In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn import metrics

# Model Training Method --
from xgboost import XGBRegressor

- import Dataset


In [None]:
# Header tag of each Columns
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM',
                'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'Price']

# Import price of Housing
df = pd.read_csv('../DataSet_Collection/houseprice.csv',
                 header=None, delimiter=r"\s+", names=column_names)

# Print First 5 column
df.head()

# Row * Columns numbers
df.shape

- Data Analysis


In [None]:
# statistical Analysis
df.describe()

In [None]:
# Check Is their any Nan present or not
df.isnull().sum()

In [None]:
# Check Correlation between other parameter in data
correlation = df.corr()


# Create a heat map
plt.figure(figsize=(10, 10))
sns.heatmap(correlation, cbar=True, square=True, fmt='1f',
            annot=True, annot_kws={'size': 8}, cmap='Greens')

- # Data split in training features and Outcome


In [None]:
X = df.drop(columns='Price', axis=1)
Y = df['Price']

- # Split Train Data and test Data


In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=1)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

# Create model and train Model


In [None]:
# load the model
model = XGBRegressor()

In [None]:
# train The Model with train Data
model.fit(X_train, Y_train)

# - Model Evaluation


In [None]:
# test model with train data
trainData_prediction = model.predict(X_train)


# %$$ Two Type of Error check
# R squared error
scoreTrain_1 = metrics.r2_score(Y_train, trainData_prediction)

# Mean Absolute Error
scoreTrain_2 = metrics.mean_absolute_error(Y_train, trainData_prediction)


# Display the Error
print(" R squared Error :", scoreTrain_1)
print(" Mean Absolute Error :", scoreTrain_2)

In [None]:
# test model with test data
testData_prediction = model.predict(X_test)


# %$$ Two Type of Error check
# R squared error
scoreTest_1 = metrics.r2_score(Y_test, testData_prediction)

# Mean Absolute Error
scoreTest_2 = metrics.mean_absolute_error(Y_test, testData_prediction)


# Display the Error
print(" R squared Error :", scoreTest_1)
print(" Mean Absolute Error :", scoreTest_2)

# Visualizing the actual and prediction


In [None]:
plt.scatter(Y_train, trainData_prediction)
plt.xlabel('Actual price')
plt.ylabel('Prediction Price')
plt.title("Actual Price Vs Prediction Price")
plt.show()