In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from copy import deepcopy

sample_submission = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/sample_submission.csv')
display(sample_submission)

### Reading the data

In [None]:
train_df = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/train.csv')
print(train_df.shape)
display(train_df.head())

## EAD's

In [None]:
plt.figure(figsize=[15, 12])
sns.heatmap(train_df.corr())
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
X_train, xx_test = train_test_split(train_df, test_size=0.2, random_state=42)
YY = train_df.to_numpy()[:, -1]
Y = X_train.to_numpy()[:, -1]
y = x_test.to_numpy()[:, -1]

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Deviding into 80, 20 


### The Sub

In [None]:
train_df_sub = X_train[['LotShape','SaleCondition', 'LotConfig', 'Neighborhood', 'BldgType', 'HouseStyle', 'RoofStyle', 'MasVnrType', 'Foundation', 'Electrical', 'Heating', 'GarageType', 'LotArea','MSSubClass','OverallQual','GrLivArea','KitchenAbvGr','EnclosedPorch','GarageCars','GarageArea','MiscVal','MoSold','YearBuilt','YrSold', 'FullBath','HalfBath','Fireplaces']]
print(train_df_sub.shape)

### Dealing with Categorical Values

In [None]:
train_df_cat = train_df_sub[['LotShape','SaleCondition', 'LotConfig', 'Neighborhood', 'BldgType', 'HouseStyle', 'RoofStyle', 'MasVnrType', 'Foundation', 'Electrical', 'Heating', 'GarageType']]
# train_df_cat = train_df_cat.fillna(train_df_cat.mode())
# train_df_cat.isnull().sum()
# display(train_df_cat.info())
train_df_cat.shape

### Numeric Values

In [None]:
train_df_nu = deepcopy(train_df_sub[['LotArea','MSSubClass','OverallQual','GrLivArea','KitchenAbvGr','EnclosedPorch','GarageCars','GarageArea','MiscVal','MoSold','YearBuilt','YrSold','FullBath','HalfBath','Fireplaces']])
# display(train_df_nu.info())
train_df_nu.shape

### Pipeline's

In [None]:
cat_attribs = ['LotShape','SaleCondition', 'LotConfig', 'Neighborhood', 'BldgType', 'HouseStyle', 'RoofStyle', 'MasVnrType', 'Foundation', 'Electrical', 'Heating', 'GarageType']
train_df_num = list(train_df_nu)


### Custom Transformers

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
# from sklearn.base import BaseEstimator, TransformerMixin

In [None]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),# Clean the Data and Filling the miss value with 'mean' imputer is the object declearing using SimpleImputer Class
    ('std_scaler', StandardScaler())
])


full_pipeline = ColumnTransformer([
    ('num', num_pipeline, train_df_num), # For Transforming the Numerical Data
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_attribs)
], remainder='drop')

train_df_prepared = full_pipeline.fit_transform(X_train)

print(train_df_prepared.shape)
# print(final_train_df_prepared.shape)

## Fitting the Model

In [None]:
X_test = full_pipeline.transform(x_test)
print(X_test.shape)

### linear Equation

In [None]:
from sklearn.linear_model import LinearRegression

linear = LinearRegression()
linear.fit(train_df_prepared, Y)

### Linear Prediction

In [None]:
## Making Prediction
linear_prediction = linear.predict(X_test)

### Calculating Losses for Linear Model

#### Usefull function

In [None]:
def display_score(score):
    print('mean Score: ', score.mean())
    print('std: ', score.std())

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

linear_loss = mean_squared_error(y, linear_prediction)
print(np.sqrt(linear_loss))

linear_scores = cross_val_score(linear, train_df_prepared, Y, scoring='neg_mean_squared_error', cv=10)
score = np.sqrt(-linear_scores)
display_score(score)



### Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

forest = RandomForestRegressor()
forest.fit(train_df_prepared, Y)

In [None]:
forest_prediction = forest.predict(X_test)

### Calculating Losses for Random Forest Model

In [None]:
forest_loss = mean_squared_error(y, forest_prediction)
print(np.sqrt(forest_loss))

forest_scores = cross_val_score(forest, train_df_prepared, Y, scoring='neg_mean_squared_error', cv=10)
score = np.sqrt(-forest_scores)
display_score(score)

### Making the Predictions

In [None]:
test_df = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/test.csv')
display(test_df.head())

### Preparing for Submit

In [None]:
# A last coress
forest_scores_final = cross_val_score(forest, final_train_df_prepared, YY, scoring='neg_mean_squared_error', cv=10)
score = np.sqrt(-forest_scores_final)
display_score(score)

In [None]:
# Preprocessing
final_train_df_prepared = full_pipeline.transform(train_df)
final_train_df_prepared.shape

In [None]:
# Fitting the Model
forest.fit(final_train_df_prepared, YY)

In [None]:
# Preprocessing test set
final_test = full_pipeline.fit_transform(test_df)
final_test.shape

In [None]:
# Predict
prediction = forest.predict(final_test)

In [None]:
output = pd.DataFrame({'Id': test_df.Id, 'SalePrice': prediction})
output.to_csv('submint_forest.csv', index=False)
print("Successfull!")