In [1]:
import pandas as pd

# Load the dataset
file_path = '/Users/shubhamkumarsingh/Desktop/Python 101 /Projects/Virtual reality management system/DATA.csv'
vr_therapy_data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
print(vr_therapy_data.head())


  APPROVAL  GROUP  AGE(IN_YRS) GENDER QUALIFICATION COLOR BLINDNESS  \
0      Yes    1.0         19.0      F       Student              No   
1      Yes    1.0         21.0      F       Student              No   
2      Yes    1.0         23.0      F       Student              No   
3      Yes    1.0         21.0      F       Student              No   
4      Yes    1.0         20.0      F       Student              No   

  VISUAL IMPAIRMENT DO YOU WEAR SPECS DO YOU BECOME ANXIOUS IN SITUATIONS  \
0                No                No                                  No   
1                No               Yes                                  No   
2                No                No                                 Yes   
3                No                No                                  No   
4                No                No                                 Yes   

  DO YOU TAKE MEDICATIONS FOR ANXIETY  ... YOU WILL HURT YOURSELF.3  \
0                                  No  

In [2]:
# Check for missing values
missing_values = vr_therapy_data.isnull().sum()
print(missing_values)

# Get an overview of data types
data_types = vr_therapy_data.dtypes
print(data_types)


APPROVAL                                                    3
GROUP                                                       4
AGE(IN_YRS)                                                 4
GENDER                                                      4
QUALIFICATION                                               4
                                                           ..
YOU WILL FAINT.3                                            4
YOU WILL FREEZE AND NoT BE ABLE TO GET OFF THE BALCONY.1    4
BEING ON THE BALCONY IS DANGEROUS.1                         4
S2 PO                                                       4
S2 PO.1                                                     4
Length: 132, dtype: int64
APPROVAL                                                     object
GROUP                                                       float64
AGE(IN_YRS)                                                 float64
GENDER                                                       object
QUALIFICATION       

In [3]:
# Fill missing values with the mode for categorical variables and mean for numerical variables
for column in vr_therapy_data.columns:
    if vr_therapy_data[column].dtype == 'object':
        vr_therapy_data[column].fillna(vr_therapy_data[column].mode()[0], inplace=True)
    else:
        vr_therapy_data[column].fillna(vr_therapy_data[column].mean(), inplace=True)

# Verify that there are no more missing values
missing_values_after_imputation = vr_therapy_data.isnull().sum()
print(missing_values_after_imputation)


APPROVAL                                                    0
GROUP                                                       0
AGE(IN_YRS)                                                 0
GENDER                                                      0
QUALIFICATION                                               0
                                                           ..
YOU WILL FAINT.3                                            0
YOU WILL FREEZE AND NoT BE ABLE TO GET OFF THE BALCONY.1    0
BEING ON THE BALCONY IS DANGEROUS.1                         0
S2 PO                                                       0
S2 PO.1                                                     0
Length: 132, dtype: int64


In [4]:
from sklearn.preprocessing import LabelEncoder

# Label encode categorical columns
label_encoders = {}
for column in vr_therapy_data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    vr_therapy_data[column] = le.fit_transform(vr_therapy_data[column])
    label_encoders[column] = le


In [5]:
from sklearn.preprocessing import MinMaxScaler

# Normalize numerical columns
scaler = MinMaxScaler()
vr_therapy_data[vr_therapy_data.select_dtypes(include=['float64']).columns] = scaler.fit_transform(
    vr_therapy_data.select_dtypes(include=['float64'])
)


In [6]:
from sklearn.model_selection import train_test_split

# Split the data into features (X) and target (y)
X = vr_therapy_data.drop('APPROVAL', axis=1)
y = vr_therapy_data['APPROVAL']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
from sklearn.linear_model import LogisticRegression

# Train a logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [9]:
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


In [10]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Logistic Regression Model Performance:")
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

# Evaluate the Random Forest model
y_pred_rf = rf_model.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)
class_report_rf = classification_report(y_test, y_pred_rf)

print("Random Forest Model Performance:")
print(f"Accuracy: {accuracy_rf}")
print(f"Confusion Matrix:\n{conf_matrix_rf}")
print(f"Classification Report:\n{class_report_rf}")


Logistic Regression Model Performance:
Accuracy: 1.0
Confusion Matrix:
[[13]]
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        13

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00        13

Random Forest Model Performance:
Accuracy: 1.0
Confusion Matrix:
[[13]]
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        13

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00        13



In [11]:
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

# Apply RandomOverSampler to the training data
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

# Check the class distribution after resampling
counter = Counter(y_resampled)
print(counter)

# Retrain the Random Forest model with resampled data
rf_model.fit(X_resampled, y_resampled)

# Evaluate the model with the test set
y_test_pred = rf_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
test_conf_matrix = confusion_matrix(y_test, y_test_pred)
test_class_report = classification_report(y_test, y_test_pred)

print("Random Forest Model Performance after Resampling:")
print(f"Accuracy: {test_accuracy}")
print(f"Confusion Matrix:\n{test_conf_matrix}")
print(f"Classification Report:\n{test_class_report}")


Counter({1: 50, 0: 50})
Random Forest Model Performance after Resampling:
Accuracy: 0.9230769230769231
Confusion Matrix:
[[ 0  0]
 [ 1 12]]
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.92      0.96        13

    accuracy                           0.92        13
   macro avg       0.50      0.46      0.48        13
weighted avg       1.00      0.92      0.96        13



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
import joblib

# Save the trained model
joblib.dump(rf_model, 'vr_therapy_model.pkl')


['vr_therapy_model.pkl']

In [18]:
from flask import Flask, request, jsonify

app = Flask(__name__)

# Load the trained model
model = joblib.load('vr_therapy_model.pkl')

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    prediction = model.predict([data])
    return jsonify({'prediction': prediction[0]})

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (fsevents)
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 15, in <module>
    from ipykernel import kernelapp as app
  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/__init__.py", line 7, in <module>
    from .connect import *
  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/connect.py", line 12, in <module>
    import jupyter_client
  File "/opt/anaconda3/lib/python3.11/site-packages/jupyter_client/__init__.py", line 3, in <module>
    from .asynchronous import AsyncKernelClient
  File "/opt/anaconda3/lib/python3.11/site-packages/jupyter_client/asynchronous/__init__.py", line 1, in <module>
    from .client import AsyncKernelClient  # noqa
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/jupyter_client/asynchronous/client.py", line 11, in <module>
    from ..chann

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
# curl -X POST -H "Content-Type: application/json" -d '{"feature1": value1, "feature2": value2, ...}' http://localhost:5000/predict
