## detect the fake profiles in online social networks using Neural Network

In [3]:
pip install tensorflow

Collecting tensorflow
  Using cached tensorflow-2.17.0-cp311-cp311-win_amd64.whl.metadata (3.2 kB)
Collecting tensorflow-intel==2.17.0 (from tensorflow)
  Using cached tensorflow_intel-2.17.0-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tens

In [11]:
import sys
import csv
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

# Function to read datasets
def read_datasets():
    genuine_users = pd.read_csv("data/users.csv")
    fake_users = pd.read_csv("data/fusers.csv")
    x = pd.concat([genuine_users, fake_users])
    y = len(fake_users) * [0] + len(genuine_users) * [1]
    return x, y

# Function for feature engineering
def extract_features(x):
    lang_list = list(enumerate(np.unique(x['lang'])))
    lang_dict = {name: i for i, name in lang_list}
    x['lang_code'] = x['lang'].map(lambda x: lang_dict[x]).astype(int)
    
    feature_columns_to_use = ['statuses_count', 'followers_count', 'friends_count', 'favourites_count', 'listed_count', 'lang_code']
    x = x.loc[:, feature_columns_to_use]
    
    # Standardize the data
    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    
    return x

# Function to build CNN model
def build_cnn(input_shape):
    model = Sequential()
    model.add(Conv1D(32, kernel_size=2, activation='relu', input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation='softmax'))  # Binary classification

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to plot confusion matrix
def plot_confusion_matrix(cm, title='Confusion Matrix', cmap=plt.cm.Blues):
    target_names = ['Fake', 'Genuine']
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(target_names))
    plt.xticks(tick_marks, target_names, rotation=45)
    plt.yticks(tick_marks, target_names)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Function to plot ROC curve
def plot_roc_curve(y_test, y_pred):
    false_positive_rate, true_positive_rate, _ = roc_curve(y_test, y_pred)
    roc_auc = auc(false_positive_rate, true_positive_rate)

    plt.title('Receiver Operating Characteristic')
    plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.xlim([-0.1, 1.2])
    plt.ylim([-0.1, 1.2])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

# Main pipeline
print("Reading datasets...\n")
x, y = read_datasets()

print("Extracting features...\n")
x = extract_features(x)

# Encode labels and split data
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y = to_categorical(y)  # Convert to categorical for Keras

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Reshape for CNN input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print("Building and training CNN model...\n")
input_shape = (X_train.shape[1], 1)
cnn_model = build_cnn(input_shape)

# Train the model
cnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)

# # Predictions
# y_pred = cnn_model.predict(X_test)
# y_pred_classes = np.argmax(y_pred, axis=1)
# y_true = np.argmax(y_test, axis=1)

# # Evaluate accuracy
# print('Classification Accuracy on Test dataset: ', accuracy_score(y_true, y_pred_classes))

# # Confusion Matrix
# cm = confusion_matrix(y_true, y_pred_classes)
# print('Confusion matrix, without normalization')
# print(cm)
# plot_confusion_matrix(cm)

# cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
# print('Normalized confusion matrix')
# plot_confusion_matrix(cm_normalized, title='Normalized Confusion Matrix')

# print(classification_report(y_true, y_pred_classes, target_names=['Fake', 'Genuine']))

# # ROC curve
# print("Plotting ROC curve...\n")
# plot_roc_curve(y_true, y_pred_classes)


Reading datasets...

Extracting features...

Building and training CNN model...

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7498 - loss: 0.5755 - val_accuracy: 0.9336 - val_loss: 0.2448
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8979 - loss: 0.3329 - val_accuracy: 0.9336 - val_loss: 0.2267
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9131 - loss: 0.2713 - val_accuracy: 0.9336 - val_loss: 0.2308
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9079 - loss: 0.2907 - val_accuracy: 0.9336 - val_loss: 0.2088
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9160 - loss: 0.2718 - val_accuracy: 0.9336 - val_loss: 0.1991
Epoch 6/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9219 - loss: 0.2402 - val_accuracy: 0.9381 - val_loss: 0.1932
Epoch 7/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x27f705db3d0>

In [18]:
# Function to read n rows from input file
def read_n_rows(input_file, n):
    input_data = pd.read_csv(input_file, nrows=n)
    return input_data

# Function to predict for n rows and write to output.csv
def predict_n_rows(model, input_file, output_file, n):
    # Load the input data
    input_data = read_n_rows(input_file, n)
    
    # Extract features from input
    input_data = extract_features(input_data)
    
    # Reshape for CNN input (n rows)
    input_data = input_data.reshape(input_data.shape[0], input_data.shape[1], 1)
    
    # Make predictions
    predictions = model.predict(input_data)
    
    # Convert to class labels (0 or 1)
    predicted_classes = np.argmax(predictions, axis=1)
    
    # Save predictions to output.csv
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Prediction'])  # Add header
        for pred in predicted_classes:
            writer.writerow([pred])
    
    print(f"Predictions for {n} rows saved to {output_file}")

# Example usage
input_file = r"C:\Users\91875\Desktop\smart\Fake-SocialMedia-Detection\Datatransition\input.csv"
output_file = r"C:\Users\91875\Desktop\smart\Fake-SocialMedia-Detection\Datatransition\output.csv"
n = 1  # Specify the number of rows to predict
predict_n_rows(cnn_model, input_file, output_file, n)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Predictions for 1 rows saved to C:\Users\91875\Desktop\smart\Fake-SocialMedia-Detection\Datatransition\output.csv
