<a href="https://colab.research.google.com/github/MrEchoFi/Bcrypt_lib-based-defensive-brute-force-attacks-and-credential-stuffing-signup-login-page-/blob/main/ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv("/laptop_prices.csv")
df.head()

Unnamed: 0,Company,Product,TypeName,Inches,Ram,OS,Weight,Price_euros,Screen,ScreenW,...,RetinaDisplay,CPU_company,CPU_freq,CPU_model,PrimaryStorage,SecondaryStorage,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model
0,Apple,MacBook Pro,Ultrabook,13.3,8,macOS,1.37,1339.69,Standard,2560,...,Yes,Intel,2.3,Core i5,128,0,SSD,No,Intel,Iris Plus Graphics 640
1,Apple,Macbook Air,Ultrabook,13.3,8,macOS,1.34,898.94,Standard,1440,...,No,Intel,1.8,Core i5,128,0,Flash Storage,No,Intel,HD Graphics 6000
2,HP,250 G6,Notebook,15.6,8,No OS,1.86,575.0,Full HD,1920,...,No,Intel,2.5,Core i5 7200U,256,0,SSD,No,Intel,HD Graphics 620
3,Apple,MacBook Pro,Ultrabook,15.4,16,macOS,1.83,2537.45,Standard,2880,...,Yes,Intel,2.7,Core i7,512,0,SSD,No,AMD,Radeon Pro 455
4,Apple,MacBook Pro,Ultrabook,13.3,8,macOS,1.37,1803.6,Standard,2560,...,Yes,Intel,3.1,Core i5,256,0,SSD,No,Intel,Iris Plus Graphics 650


In [None]:
top_brands = df['Company'].value_counts().head(5)
print("Top 5 laptop brands:\n", top_brands)


In [None]:
avg_price = df.groupby('Company')['Price_euros'].mean().sort_values()
print("\nAverage price per brand:\n", avg_price)
print("Highest avg price brand:", avg_price.idxmax(), "->", avg_price.max())
print("Lowest avg price brand:", avg_price.idxmin(), "->", avg_price.min())

In [None]:
corr = df[['Price_euros','CPU_freq','Ram','Inches','Weight']].corr()
print("\nCorrelation with Price_euros:\n", corr['Price_euros'].sort_values(ascending=False))


In [None]:
df['StorageTotal'] = df['PrimaryStorage'] + df['SecondaryStorage']
print("\nNew feature 'StorageTotal' created.")

In [None]:
X = df[['Ram','Inches','CPU_freq','PrimaryStorage','GPU_company']]
y = df['Price_euros']

X['GPU_company'] = LabelEncoder().fit_transform(X['GPU_company'])


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:

linreg = LinearRegression()
linreg.fit(X_train, y_train)
y_pred_lr = linreg.predict(X_test)
print("\nLinear Regression R2:", r2_score(y_test, y_pred_lr))


In [None]:
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Regressor R2:", r2_score(y_test, y_pred_rf))


In [None]:
best_reg_model = "Random Forest Regressor" if r2_score(y_test, y_pred_rf) > r2_score(y_test, y_pred_lr) else "Linear Regression"
print("Best regression model:", best_reg_model)


In [None]:
X_cls = df[['Inches','Ram','PrimaryStorage','Weight']]
y_cls = df['TypeName']

X_train, X_test, y_train, y_test = train_test_split(
    X_cls, y_cls, test_size=0.2, random_state=42
)


In [None]:
rf_cls = RandomForestClassifier(random_state=42)
rf_cls.fit(X_train, y_train)
y_pred_cls = rf_cls.predict(X_test)

In [None]:
print("\nClassification Accuracy:", accuracy_score(y_test, y_pred_cls))
importances = pd.Series(rf_cls.feature_importances_, index=X_cls.columns)
print("Feature importances:\n", importances.sort_values(ascending=False))
print("Best classification model: Random Forest Classifier")