In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from cryptography.fernet import Fernet
import joblib

# Load the dataset
file_path = r"C:\Users\DELL\Desktop\ai\llllllllll\dataset of sensitive and not sensitive data.xlsx"
data = pd.read_excel(file_path)

# Data Preprocessing
X = data['Data']  # Text data
y = data['Label']  # Labels

# Convert labels to binary format (Sensitive=1, Non-Sensitive=0)
y = y.apply(lambda x: 1 if x == 'Sensitive' else 0)
# Convert all entries in 'Data' to strings
X = data['Data'].astype(str)  # Ensure all data is in string format
y = data['Label'].apply(lambda x: 1 if x == 'Sensitive' else 0)  # Convert labels to binary format

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Evaluate the model
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save the model and vectorizer for later use
joblib.dump(model, "sensitivity_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

# Encryption setup
key = Fernet.generate_key()
cipher_suite = Fernet(key)

# Function to test new input
def predict_and_encrypt(text):
    # Load model and vectorizer
    model = joblib.load("sensitivity_model.pkl")
    vectorizer = joblib.load("tfidf_vectorizer.pkl")
    
    # Preprocess the text input
    text_tfidf = vectorizer.transform([text])
    
    # Predict sensitivity
    prediction = model.predict(text_tfidf)[0]
    
    if prediction == 1:
        print("The text is predicted as Sensitive.")
        encrypted_text = cipher_suite.encrypt(text.encode())
        print("Encrypted text (to be sent to the cloud):", encrypted_text)
        # Here you could add functionality to send to the cloud
    else:
        print("The text is predicted as Non-Sensitive.")

# Test the function with an input
test_text = input("Enter text to test sensitivity: ")
predict_and_encrypt(test_text)


Accuracy: 0.9257425742574258
              precision    recall  f1-score   support

           0       0.98      0.87      0.92        98
           1       0.89      0.98      0.93       104

    accuracy                           0.93       202
   macro avg       0.93      0.92      0.93       202
weighted avg       0.93      0.93      0.93       202



Enter text to test sensitivity:  password


The text is predicted as Sensitive.
Encrypted text (to be sent to the cloud): b'gAAAAABnKhmhWseLSxcI88pgIkwCcs1E7JbGQuxln6v4Mkmh__rpUYH2u5qeqZXrRKo6YctFI3azNeBpiuhZObSUhGDCor3OAQ=='
