<a href="https://colab.research.google.com/github/Farooqbasha008/Analysis-of-Emotional-Intensity-of-tweets/blob/main/models/Statistical_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score

# Read the train and test datasets
train_df = pd.read_csv('trained.csv')
test_df = pd.read_csv('tested.csv')

# Drop unnecessary columns
train_df = train_df.drop('Unnamed: 0', axis=1)
test_df = test_df.drop('Unnamed: 0', axis=1)

# Prepare the train and test data
X_train = train_df.iloc[:, :5]
y_train = train_df['score']

X_test = test_df.iloc[:, :5]
y_test = test_df['score']

# Define and fit the Ridge model
model = Ridge()
model.fit(X_train, y_train)

# Predict using the Ridge model
y_pred = model.predict(X_test)

# Define cross-validation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# Define RidgeCV model with alpha selection
model_cv = RidgeCV(alphas=np.arange(0.01, 1, 0.01), cv=cv, scoring='neg_mean_absolute_error')
model_cv.fit(X_train, y_train)

# Get the chosen alpha value
alpha = model_cv.alpha_

# Define Ridge model with chosen alpha
model = Ridge(alpha=alpha)
model.fit(X_train, y_train)

# Predict using the Ridge model with chosen alpha
y_pred = model.predict(X_test)

# Calculate evaluation metrics

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

# Calculate MSE
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

# Calculate RMSE
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)

# Calculate R2 score
r2 = r2_score(y_test, y_pred)
print("R-squared (R2):", r2)

# Calculate RMSLE
rmsle = np.sqrt(mean_squared_log_error(y_test, y_pred))
print("Root Mean Squared Logarithmic Error (RMSLE):", rmsle)

# Calculate MAPE
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print("Mean Absolute Percentage Error (MAPE):", mape)


Mean Absolute Error (MAE): 0.0510570074666143
Mean Squared Error (MSE): 0.011451054954837744
Root Mean Squared Error (RMSE): 0.10700960216185156
R-squared (R2): 0.7095968416530064
Root Mean Squared Logarithmic Error (RMSLE): 0.07229241877765256
Mean Absolute Percentage Error (MAPE): inf
