# Psychometric Profiling of Online Gamers from Anxiety and Behavioural Patterns

## **Import Libraries**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,precision_score,recall_score,f1_score

## **Uploading Data**

In [None]:
df = pd.read_csv('Datasets/GamingStudy_data.csv', encoding = 'ISO-8859-1')

In [None]:
df.head()

In [None]:
df.info()

# **EDA**

In [None]:
px.scatter(df, x="Hours", y="Work")


# **Data Preparation and Cleaning**

In [None]:
df['highestleague'].value_counts()

In [None]:
df.drop(columns = ['highestleague','S. No.'],inplace=True)

In [None]:
df.info()

Check For Missing Values

In [None]:
df.isnull().sum()

In [None]:
ms_v = df.isnull().sum()

# Convert the Series to a DataFrame
ms_v_df = ms_v.reset_index()
ms_v_df.columns = ['Columns', 'Missing Values']

# Reshape the DataFrame for the heatmap
# Transpose and convert to a DataFrame with a single row
ms_v_heatmap = ms_v_df.set_index('Columns').T

sns.heatmap(ms_v_heatmap, annot=True, cmap='viridis')
plt.title('Missing Values Heatmap')
plt.show()

## Data Cleaning

In [None]:
from sklearn.impute import SimpleImputer

In [None]:
num_col = []
for i in range (1,18):
  num_col.append(f"SPIN{i}")

In [None]:
print(num_col)

In [None]:
additional_cols = ['Hours','streams','Narcissism','SPIN_T']

In [None]:
numerical_imputer = SimpleImputer(missing_values=np.nan , strategy = 'mean')
num_col.extend(additional_cols)
print(num_col)

In [None]:
for i in num_col:
  df[num_col] = numerical_imputer.fit_transform(df[num_col])


In [None]:
df.isnull().sum()

Solving Categorical Missing Values with Mode

In [None]:
categorical_imputer = SimpleImputer(missing_values=np.nan , strategy = 'most_frequent')
categ_cols = ['GADE','League','Work','Degree','Reference','accept','Residence_ISO3','Birthplace_ISO3']

In [None]:
for i in categ_cols:
  df[categ_cols] = categorical_imputer.fit_transform(df[categ_cols])

In [None]:
df.isnull().sum()

In [None]:
ms_v = df.isnull().sum()

# Convert the Series to a DataFrame
ms_v_df = ms_v.reset_index()
ms_v_df.columns = ['Columns', 'Missing Values']

# Reshape the DataFrame for the heatmap
# Transpose and convert to a DataFrame with a single row
ms_v_heatmap = ms_v_df.set_index('Columns').T
plt.figure(figsize=(12, 2))
sns.heatmap(ms_v_heatmap, annot=True)
plt.title('Missing Values Heatmap')
plt.show()

**Removing Duplicates**

In [None]:
duplicated_counts=df.duplicated().sum()
duplicated_counts

In [None]:
df.drop_duplicates(inplace=True)
df.duplicated().sum()

  Encoding Categorical Columns

In [None]:
df.info()

In [None]:
object_cols = df.select_dtypes(include='object')
object_cols.columns

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for i in object_cols:
  df[i] = le.fit_transform(df[i])
df.info()

# Using the dendrogram to find the optimal number of clusters


In [None]:
import scipy.cluster.hierarchy as sch


In [None]:
dendrogram = sch.dendrogram(sch.linkage(df, method = 'ward'))
plt.title('Dendrogram')
plt.xlabel('Gamers')
plt.ylabel('Euclidean distances')
plt.show()

# **Training the Hierarchical Clustering model on the dataset**

In [None]:
from sklearn.cluster import AgglomerativeClustering

# Create an AgglomerativeClustering model with Ward linkage (default Euclidean)
hc = AgglomerativeClustering(n_clusters=3, linkage='ward')

# Fit and predict cluster labels
labels_hc = hc.fit_predict(df)

In [None]:
df['Clusters_Label'] = labels_hc
df.head()

In [None]:
df.info()

In [None]:
X = df.drop(columns = ['Clusters_Label'])
y = df['Clusters_Label']

In [None]:
labels = y.unique()
labels

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# **Model Building**

In [None]:
#defining global variables to store accuracy and other metrics
precision = []
recall = []
fscore = []
accuracy = []

In [None]:
def calculateMetrics(algorithm, testY, predict):
    testY = testY.astype('int')
    predict = predict.astype('int')

    p = precision_score(testY, predict, average='macro') * 100
    r = recall_score(testY, predict, average='macro') * 100
    f = f1_score(testY, predict, average='macro') * 100
    a = accuracy_score(testY, predict) * 100 

    accuracy.append(a)
    precision.append(p)
    recall.append(r)
    fscore.append(f)

    print(algorithm + ' Accuracy    : ' + str(a))
    print(algorithm + ' Precision   : ' + str(p))
    print(algorithm + ' Recall      : ' + str(r))
    print(algorithm + ' FSCORE      : ' + str(f))

    # Convert labels to strings
    labels = [str(label) for label in np.unique(testY)]

    # Generate classification report
    report = classification_report(testY, predict, target_names=labels)  # Fix order of arguments
    print('\n', algorithm + " Classification Report\n", report)

    # Confusion matrix
    conf_matrix = confusion_matrix(testY, predict) 
    plt.figure(figsize=(5, 5)) 
    ax = sns.heatmap(conf_matrix, xticklabels=labels, yticklabels=labels, annot=True, cmap="Blues", fmt="g")
    ax.set_ylim([0, len(labels)])
    plt.title(algorithm + " Confusion Matrix") 
    plt.ylabel('True class') 
    plt.xlabel('Predicted class') 
    plt.show()


In [None]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import joblib
import os

# Standardize the dataset
scaler = StandardScaler()      # call
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Check if the trained model exists
if os.path.exists('model/SVC.pkl'):
    # Load the trained model from the file
    svc = joblib.load('model/SVC.pkl')
    print("SVC Model loaded successfully.")
else:
    # Train SVC model
    svc = SVC(kernel='poly')  # You can change kernel to 'rbf', 'poly', etc.
    svc.fit(X_train_scaled, y_train)

    # Save the trained model
    joblib.dump(svc, 'model/SVC.pkl')
    print("SVC Model saved successfully.")

# Predict using SVC
predict = svc.predict(X_test_scaled)
calculateMetrics("SVC Classifier", y_test, predict)


In [None]:
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMClassifier
import joblib, os, pickle

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

joblib.dump(scaler, 'model/standard_scaler.pkl')

model_path = 'model/LGBMClassifier.pkl'

if os.path.exists(model_path):
    
    lgbm = joblib.load(model_path)
    print("LGBMClassifier model loaded successfully.")
else:
    lgbm = LGBMClassifier()
    lgbm.fit(X_train_scaled, y_train)
    
    joblib.dump(lgbm, model_path)
    print("LGBMClassifier model saved successfully.")

predict = lgbm.predict(X_test_scaled)

calculateMetrics("LGBM Classifier", predict, y_test)

In [None]:
test = pd.read_csv('Datasets/test.csv')
test

In [None]:
test_scaled = scaler.transform(test)


In [None]:
predict = lgbm.predict(test_scaled)
predict

In [None]:
test['Predications'] = predict
test