In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
import numpy as np
import warnings

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder

In [9]:
# Read data
df1 = pd.read_csv("mental-and-substance-use-as-share-of-disease.csv")
df2 = pd.read_csv("prevalence-by-mental-and-substance-use-disorder.csv")


In [10]:
# Find common columns for merging
common_columns = list(set(df1.columns) & set(df2.columns))

# Merge data
data = pd.merge(df1, df2, on=common_columns)

# Drop unnecessary columns
data.drop(['Code'], axis=1, inplace=True)

In [11]:
# Rename columns
data.columns = ['Country', 'Year', 'Schizophrenia', 'Bipolar_disorder', 'Eating_disorder', 'Anxiety', 'Drug_usage', 'Depression', 'Alcohol', 'Mental_fitness']

# Display basic information about the data
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6840 entries, 0 to 6839
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country           6840 non-null   object 
 1   Year              6840 non-null   int64  
 2   Schizophrenia     6840 non-null   float64
 3   Bipolar_disorder  6840 non-null   float64
 4   Eating_disorder   6840 non-null   float64
 5   Anxiety           6840 non-null   float64
 6   Drug_usage        6840 non-null   float64
 7   Depression        6840 non-null   float64
 8   Alcohol           6840 non-null   float64
 9   Mental_fitness    6840 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 534.5+ KB
None


In [12]:
# Data preprocessing: Label encoding for categorical columns
le = LabelEncoder()
for col in ['Country']:
    data[col] = le.fit_transform(data[col])

In [13]:
# Check the current columns and their count
print("Current Columns:", data.columns)
print("Number of Columns:", len(data.columns))

Current Columns: Index(['Country', 'Year', 'Schizophrenia', 'Bipolar_disorder',
       'Eating_disorder', 'Anxiety', 'Drug_usage', 'Depression', 'Alcohol',
       'Mental_fitness'],
      dtype='object')
Number of Columns: 10


In [14]:
# Split data into features (X) and target variable (y)
X = data.drop('Mental_fitness', axis=1)
y = data['Mental_fitness']

In [15]:
# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForestRegressor
rf = RandomForestRegressor(random_state=42)
rf.fit(xtrain, ytrain)

In [16]:
# Model evaluation on the training set
ytrain_pred = rf.predict(xtrain)
mse_train = mean_squared_error(ytrain, ytrain_pred)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(ytrain, ytrain_pred)
print("Training Set Performance:")
print('MSE:', mse_train)
print('RMSE:', rmse_train)
print('R2 Score:', r2_train)

Training Set Performance:
MSE: 0.000602196054841899
RMSE: 0.02453968326694334
R2 Score: 0.9993125634594937


In [17]:
# Model evaluation on the testing set
ytest_pred = rf.predict(xtest)
mse_test = mean_squared_error(ytest, ytest_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(ytest, ytest_pred)

print("\nTesting Set Performance:")
print('MSE:', mse_test)
print('RMSE:', rmse_test)
print('R2 Score:', r2_test)


Testing Set Performance:
MSE: 0.003485207120089088
RMSE: 0.05903564279390111
R2 Score: 0.9959561016389319


In [18]:
# User input for prediction
country_input = le.transform([input('Enter Your country Name:').strip()])[0]  # Added strip() to remove leading/trailing spaces
year_input = int(input("Enter the year:"))
schi_input = float(input("Enter your Schizophrenia rate in %:"))
bipo_dis_input = float(input("Enter your Bipolar disorder rate in %:"))
eat_dis_input = float(input("Enter your Eating disorder rate in %:"))
anx_input = float(input("Enter your Anxiety rate in %:"))
drug_use_input = float(input("Enter your Drug Usage rate in %:"))
depr_input = float(input("Enter your Depression rate in %:"))
alch_input = float(input("Enter your Alcohol Consuming rate per year in %:"))

# Make prediction
prediction = rf.predict([[country_input, year_input, schi_input, bipo_dis_input, eat_dis_input, anx_input, drug_use_input, depr_input, alch_input]])

print("\nYour Predicted Mental Fitness is: {}%".format(prediction[0] * 10))


Your Predicted Mental Fitness is: 24.456323082999987%
