# Importing the Libraroes

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/mk-gurucharan/Regression/master/IceCreamData.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.size

# Preparing data for model

In [None]:
x = np.array(df.Temperature)
y = np.array(df.Revenue)

splitting the dataset

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [None]:
model = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=0, bootstrap=True)

In [None]:
model.fit(x_train.reshape(-1,1), y_train.ravel())

In [None]:
y_pred = model.predict(x_test.reshape(-1,1))

In [None]:
pred = pd.DataFrame({'Actual': y_test.reshape(-1), 'Predicted': y_pred.reshape(-1)})
pred.head()

In [None]:
plt.scatter(x_test, y_test, color='blue')
plt.scatter(x_test, y_pred, color='red')
plt.xlabel('Temperature (K)')
plt.ylabel('Revenue')

In [None]:
sns.heatmap(pred.corr(), annot=True)

In [None]:
sns.boxplot(df.corr())

In [None]:
model.score(x_test.reshape(-1,1), y_test.ravel())

In [None]:
r2_score(y_test, y_pred)

# car prize prediction

In [None]:
car_df = pd.read_csv('https://raw.githubusercontent.com/sahilrahman12/Price_prediction_of_used_Cars_-Predictive_Analysis-/master/cardekho_data.csv')

In [None]:
car_df.head()

In [None]:
car_df.shape

In [None]:
car_df.info()

In [None]:
car_df['no_of_years'] = 2024-car_df['Year']
car_df.head()   

In [None]:
car_df.drop(['Year', 'Car_Name'], axis=1, inplace=True)
car_df.head()

In [None]:
new_df = pd.get_dummies(car_df)
new_df.head()

In [None]:
new_df.Fuel_Type_CNG.astype(int)
new_df.Fuel_Type_Diesel.astype(int)
new_df.Fuel_Type_Petrol.astype(int)
new_df.Transmission_Automatic.astype(int)
new_df.Transmission_Manual.astype(int)
new_df.Seller_Type_Dealer.astype(int)
new_df.Seller_Type_Individual.astype(int)

In [None]:
new_df.drop(['Fuel_Type_CNG', 'Seller_Type_Dealer', 'Transmission_Automatic'], axis=1, inplace=True)
new_df.head()

In [None]:
sns.pairplot(new_df)

In [None]:
sns.heatmap(new_df.corr(), annot=True)

In [None]:
x = new_df.iloc[:, 1:]
y = new_df.iloc[:, 0]

# Feature Selection (feature importance)

In [None]:
from sklearn.ensemble import ExtraTreesRegressor

In [None]:
model = ExtraTreesRegressor(n_estimators=100, random_state=0)
model.fit(x, y)

In [None]:
model.feature_importances_

In [None]:
imp = pd.Series(model.feature_importances_, index=x.columns)
imp.nlargest(10).plot(kind='barh')

# splitting the data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [None]:
x_test

In [None]:
regression = RandomForestRegressor()

# Hyper Parameter Tuning

In [None]:
n_estimators = [int(x) for x in np.linspace(start=100, stop=1200, num=12)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(start=5, stop=30, num=6)]
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
final_regressor = RandomizedSearchCV(estimator=regression, param_distributions=random_grid, cv=5, random_state=42, n_jobs=1, scoring='neg_mean_squared_error', verbose=2)

In [None]:
final_regressor.fit(x_train, y_train)

In [None]:
final_regressor.best_params_

In [None]:
y_pred = final_regressor.predict(x_test)

In [None]:
plt.scatter(y_test, y_pred)

In [None]:
r2_score(y_test, y_pred)