In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor

In [None]:
file_path = r"C:\Users\mdmai\Downloads\sec_data.csv"

df = pd.read_csv(file_path)
df.head()

In [None]:
df.shape

In [None]:
df.describe()


In [None]:
from sklearn.model_selection import train_test_split

np.random.seed(0)
df_train, df_test = train_test_split(df, train_size = 0.8, test_size = 0.2, random_state = 100)

In [None]:
df_train.head()

In [None]:
df_train.shape

In [None]:
y_train = df_train.pop('Price')
X_train = df_train

In [None]:
lm = LinearRegression()
lm.fit(X_train, y_train)

In [None]:
estimator = LinearRegression()

rfe = RFE(estimator, n_features_to_select=5)
rfe = rfe.fit(X_train, y_train)

In [None]:
df['Brand'].value_counts()
fig_dims = (20, 6)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(x="Brand", data=df, ax=ax)

In [None]:
X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
df_test.shape

In [None]:
lasso = Lasso(alpha=0.1)

lasso.fit(X_train, y_train)

y_pred = lasso.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R^2 Score: {r2:.2f}")

In [None]:
predicted_values = lasso.predict(X_test)

for i, (prediction, actual) in enumerate(zip(y_pred, y_test)):
   print(f" {i+1}. Predicted price: {prediction:.2f}, Actual price: {actual:.2f}")

In [None]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)

rf.fit(X_train, y_train)

y_pred1 = rf.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred1)
mae = mean_absolute_error(y_test, y_pred1)
r2 = r2_score(y_test, y_pred1)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R^2 Score: {r2:.2f}")

In [None]:
for i, (prediction, actual) in enumerate(zip(y_pred1, y_test), start=1):
    print(f"Data Point {i}: Predicted Price: {prediction:.2f}, Actual Price: {actual:.2f}")

In [None]:
from sklearn.linear_model import Ridge

In [None]:

ridge = Ridge(alpha=0.1)

ridge.fit(X_train, y_train)

y_pred2 = ridge.predict(X_test)


In [None]:
mse = mean_squared_error(y_test, y_pred2)
mae = mean_absolute_error(y_test, y_pred2)
r2 = r2_score(y_test, y_pred2)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R^2 Score: {r2:.2f}")


In [None]:
for i, (prediction, actual) in enumerate(zip(y_pred2, y_test), start=1):
    print(f"Data Point {i}: Predicted Price: {prediction:.2f}, Actual Price: {actual:.2f}")

In [None]:
from sklearn.svm import SVR

In [None]:
svr = SVR(kernel='rbf')

svr.fit(X_train, y_train)
y_pred3 = svr.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred3)
mae = mean_absolute_error(y_test, y_pred3)
r2 = r2_score(y_test, y_pred3)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R^2 Score: {r2:.2f}")

In [None]:
for i, (prediction, actual) in enumerate(zip(y_pred3, y_test), start=1):
    print(f"Data Point {i}: Predicted Price: {prediction:.2f}, Actual Price: {actual:.2f}")

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
dt = DecisionTreeRegressor(random_state=42)

dt.fit(X_train, y_train)
y_pred4 = dt.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred4)
mae = mean_absolute_error(y_test, y_pred4)
r2 = r2_score(y_test, y_pred4)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R^2 Score: {r2:.2f}")

In [None]:
for i, (prediction, actual) in enumerate(zip(y_pred4, y_test), start=1):
    print(f"Data Point {i}: Predicted Price: {prediction:.2f}, Actual Price: {actual:.2f}")

In [None]:
gb = GradientBoostingRegressor(random_state=42)
gb.fit(X_train, y_train)

y_pred5 = gb.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred5)
r2 = r2_score(y_test, y_pred5)
mae = mean_absolute_error(y_test, y_pred5)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R^2 Score: {r2:.2f}")

In [None]:
for i, (prediction, actual) in enumerate(zip(y_pred5, y_test), start=1):
    print(f"Data Point {i}: Predicted Price: {prediction:.2f}, Actual Price: {actual:.2f}")

In [None]:
# Assume X and y are your feature matrix and target variable respectively
# Update X and y with the relevant data from your dataset

# Create a list to store the individual models
models = []

# Train ten Random Forest models and add them to the list
for _ in range(10):
    # Create a Random Forest model
    rf = RandomForestRegressor(n_estimators=100, max_depth=10)

    # Train the model on the data
    rf.fit(X, y)

    # Add the trained model to the list
    models.append(rf)

# Make predictions using each individual model
predictions = []
for model in models:
    pred = model.predict(X)
    predictions.append(pred)

# Combine the predictions using the average
ensemble_pred = np.mean(predictions, axis=0)

# Evaluate the ensemble model
mse = mean_squared_error(y, ensemble_pred)
mae = mean_absolute_error(y, ensemble_pred)
r2 = r2_score(y, ensemble_pred)

# Print the evaluation metrics
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)

In [None]:
for i, (prediction, actual) in enumerate(zip(ensemble_pred,y), start=1):
    print(f"Data Point {i}: Predicted Price: {prediction:.2f}, Actual Price: {actual:.2f}")

In [None]:
import pickle

pickle.dump(df.open('df.pkl','wb'))
pickle.dump(dt.open('pipe.pkl','wb'))

In [None]:
b