In [31]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score, r2_score

In [32]:
df = pd.read_csv('/content/nifty_500.csv')
df.isnull().sum()
df.dropna(inplace=True)
le = LabelEncoder()
df['Industry_encoded'] = le.fit_transform(df['Industry'])
df['Series_encoded'] = le.fit_transform(df['Series'])


In [33]:
df

Unnamed: 0,Company Name,Symbol,Industry,Series,Open,High,Low,Previous Close,Last Traded Price,Change,Percentage Change,Share Volume,Value (Indian Rupee),52 Week High,52 Week Low,365 Day Percentage Change,30 Day Percentage Change,Industry_encoded,Series_encoded
0,3M India Ltd.,3MINDIA,Diversified,EQ,21950.00,21999.00,21126.05,21854.05,21575.00,-279.05,-1.28,4159,8.965357e+07,27800.00,17273.00,-10.18,8.22,7,1
1,Aarti Drugs Ltd.,AARTIDRUGS,Healthcare,EQ,400.50,401.80,394.10,403.85,400.00,-3.85,-0.95,31782,1.262731e+07,749.75,378.00,-42.92,-5.10,11,1
2,Aavas Financiers Ltd.,AAVAS,Financial Services,EQ,1997.10,2004.05,1894.50,2015.45,1943.15,-72.30,-3.59,150704,2.907728e+08,3340.00,1815.00,-25.69,-9.39,9,1
3,ABB India Ltd.,ABB,Capital Goods,EQ,2260.35,2311.50,2260.35,2300.90,2280.00,-20.90,-0.91,97053,2.210935e+08,2487.85,1618.05,27.25,-5.21,1,1
4,Abbott India Ltd.,ABBOTINDIA,Healthcare,EQ,18700.40,19200.00,18605.00,18760.40,19199.80,439.40,2.34,12396,2.346770e+08,23934.45,15514.00,11.61,5.84,11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,Zensar Technolgies Ltd.,ZENSARTECH,Information Technology,EQ,273.15,273.55,268.40,272.10,270.00,-2.10,-0.77,186578,5.046748e+07,587.00,246.65,-12.61,-11.02,12,1
497,ZF Commercial Vehicle Control Systems India Ltd.,ZFCVINDIA,Automobile and Auto Components,EQ,7748.00,7900.00,7525.30,7716.60,7680.00,-36.60,-0.47,12790,9.884624e+07,8780.05,6793.30,-,1.43,0,1
498,Zomato Ltd.,ZOMATO,Consumer Services,EQ,54.15,56.70,52.55,53.85,56.00,2.15,3.99,112078641,6.193466e+09,169.00,50.05,-,-27.86,6,1
499,Zydus Lifesciences Ltd.,ZYDUSLIFE,Healthcare,EQ,356.90,364.05,354.30,357.00,364.00,7.00,1.96,862373,3.100145e+08,651.90,319.00,-,-5.64,11,1


In [46]:
df.isnull()

Unnamed: 0,Company Name,Symbol,Industry,Series,Open,High,Low,Previous Close,Last Traded Price,Change,Percentage Change,Share Volume,Value (Indian Rupee),52 Week High,52 Week Low,365 Day Percentage Change,30 Day Percentage Change,Industry_encoded,Series_encoded,BinaryTarget
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
493,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
494,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
495,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
496,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [44]:
df.dropna(inplace = True)

In [34]:
features_regression = ['Open', 'High', 'Low', 'Previous Close', 'Percentage Change', '52 Week High', '52 Week Low', '365 Day Percentage Change', '30 Day Percentage Change']
target_regression = 'Last Traded Price'
df[features_regression] = df[features_regression].apply(pd.to_numeric, errors='coerce')
df[target_regression] = pd.to_numeric(df[target_regression], errors='coerce')
df.dropna(subset=features_regression + [target_regression], inplace=True)
X_regression = df[features_regression]
y_regression = df[target_regression]
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_regression, y_regression, test_size=0.2, random_state=42)


In [35]:
reg_model = LinearRegression()
reg_model.fit(X_train_reg, y_train_reg)
y_pred_reg = reg_model.predict(X_test_reg)
mse = mean_squared_error(y_test_reg, y_pred_reg)
r2 = r2_score(y_test_reg, y_pred_reg)
print(f"Regression - Mean Squared Error: {mse}")
print(f"Regression - R-squared: {r2}")


Regression - Mean Squared Error: 810.3142455970135
Regression - R-squared: 0.9999697245424469


In [36]:
df['Percentage Change'] = pd.to_numeric(df['Percentage Change'], errors='coerce')
df['BinaryTarget'] = df['Percentage Change'].apply(lambda x: 1 if x > 0 else 0)


In [37]:
features_classification = ['Open', 'High', 'Low', 'Previous Close', '52 Week High', '52 Week Low', '365 Day Percentage Change']
target_classification = 'BinaryTarget'
X_classification = df[features_classification].fillna(0)
y_classification = df[target_classification]
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_classification, y_classification, test_size=0.2, random_state=42)


In [38]:
svm_model = SVC()
svm_model.fit(X_train_clf, y_train_clf)
y_pred_svm = svm_model.predict(X_test_clf)
accuracy_svm = accuracy_score(y_test_clf, y_pred_svm)
precision_svm = precision_score(y_test_clf, y_pred_svm)
recall_svm = recall_score(y_test_clf, y_pred_svm)
f1_svm = f1_score(y_test_clf, y_pred_svm)
print(f"SVM - Accuracy: {accuracy_svm}")
print(f"SVM - Precision: {precision_svm}")
print(f"SVM - Recall: {recall_svm}")
print(f"SVM - F1-score: {f1_svm}")


SVM - Accuracy: 0.5604395604395604
SVM - Precision: 0.5604395604395604
SVM - Recall: 1.0
SVM - F1-score: 0.7183098591549296


In [39]:
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train_reg, y_train_reg)
y_pred_rf_reg = rf_regressor.predict(X_test_reg)
mse_rf = mean_squared_error(y_test_reg, y_pred_rf_reg)
r2_rf = r2_score(y_test_reg, y_pred_rf_reg)
print(f"Random Forest Regression - Mean Squared Error: {mse_rf}")
print(f"Random Forest Regression - R-squared: {r2_rf}")


Random Forest Regression - Mean Squared Error: 1163401.9459077818
Random Forest Regression - R-squared: 0.9565322633510699


In [40]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_clf, y_train_clf)
y_pred_rf_clf = rf_classifier.predict(X_test_clf)

accuracy_rf = accuracy_score(y_test_clf, y_pred_rf_clf)
precision_rf = precision_score(y_test_clf, y_pred_rf_clf)
recall_rf = recall_score(y_test_clf, y_pred_rf_clf)
f1_rf = f1_score(y_test_clf, y_pred_rf_clf)

print(f"Random Forest Classification - Accuracy: {accuracy_rf}")
print(f"Random Forest Classification - Precision: {precision_rf}")
print(f"Random Forest Classification - Recall: {recall_rf}")
print(f"Random Forest Classification - F1-score: {f1_rf}")


Random Forest Classification - Accuracy: 0.5274725274725275
Random Forest Classification - Precision: 0.5606060606060606
Random Forest Classification - Recall: 0.7254901960784313
Random Forest Classification - F1-score: 0.6324786324786323


In [41]:
print("\nComparison of SVM and Random Forest for Classification:")
print(f"SVM Accuracy: {accuracy_svm} | RF Accuracy: {accuracy_rf}")
print(f"SVM F1-score: {f1_svm} | RF F1-score: {f1_rf}")



Comparison of SVM and Random Forest for Classification:
SVM Accuracy: 0.5604395604395604 | RF Accuracy: 0.5274725274725275
SVM F1-score: 0.7183098591549296 | RF F1-score: 0.6324786324786323


In [48]:
from sklearn.metrics import classification_report
print("Classification Report for SVM:")
print(classification_report(y_test_clf, y_pred_svm))
print("\nClassification Report for Random Forest:")
print(classification_report(y_test_clf, y_pred_rf_clf))

Classification Report for SVM:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.56      1.00      0.72        51

    accuracy                           0.56        91
   macro avg       0.28      0.50      0.36        91
weighted avg       0.31      0.56      0.40        91


Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.44      0.28      0.34        40
           1       0.56      0.73      0.63        51

    accuracy                           0.53        91
   macro avg       0.50      0.50      0.49        91
weighted avg       0.51      0.53      0.50        91



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [49]:
from sklearn.metrics import confusion_matrix
cm_svm = confusion_matrix(y_test_clf, y_pred_svm)
print("Confusion Matrix for SVM:")
print(cm_svm)
cm_rf = confusion_matrix(y_test_clf, y_pred_rf_clf)
print("\nConfusion Matrix for Random Forest:")
print(cm_rf)


Confusion Matrix for SVM:
[[ 0 40]
 [ 0 51]]

Confusion Matrix for Random Forest:
[[11 29]
 [14 37]]
