In [None]:
import numpy as np
import matplotlib.pyplot as plt
from ai_project.models.support_vector_regression.svr import SVR
from ai_project.common.data_manipulation.prepare_data import prepare_data
import pandas as pd
import os

path = os.path.join('ai_project', 'data', 'processed', 'house_price.csv')

data = pd.read_csv(path)

x = data.iloc[:, 1:].to_numpy()

x = prepare_data(x)
y = data['SalePrice'].to_numpy()

x_train = x[:-100, :]
y_train = y[:-100]

x_test = x[-100:, :]
y_test = y[-100:]


In [None]:
data.describe().round()

In [None]:
model = SVR(epsilon=10000, loss_function='linear', kernel='linear')
model.fit(x_train, y_train, n_iters=500, learning_rate=2000)
predicted = [model.predict(np.array([x]))[0][0] for x in x_test]
df = pd.DataFrame({'Actual': y_test, 'Predicted': predicted})
df = df.round()
df['Substract'] = df['Predicted'] - df['Actual']

rmse = ((df['Predicted'] - df['Actual']) ** 2).mean() ** 0.5
mae = ((df['Predicted'] - df['Actual']).abs()).mean()
df.head(10)

print(f'\n\nRMSE: {rmse:.2f}')
print(f'MAE: {mae:.2f}')

df.plot()

In [None]:
path = os.path.join('ai_project', 'data', 'processed', 'life_expectancy_data.csv')

df = pd.read_csv(path)

In [None]:
df.describe()

In [None]:
import seaborn as sns
# Correlation matrix
corrmat = df.corr()
f, ax = plt.subplots(figsize=(14,14))
sns.heatmap(corrmat, vmax=0.8, square=True)

In [None]:
# Saleprice correlation matrix
k = 10 # numbers of variables in heatmap
cols = corrmat.nlargest(k, 'Life expectancy ')['Life expectancy '].index
f, ax = plt.subplots(figsize=(10, 10))
cm = np.corrcoef(df[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)

In [None]:
cols

In [None]:
data = df[cols]

In [None]:
data.describe()

In [None]:
sns.displot(data["Life expectancy "])

In [None]:
x_label = 'Income composition of resources'
data = pd.concat([data['Life expectancy '], data[x_label]], axis=1)
data.plot.scatter(x=x_label, y='Life expectancy ', ylim=(0, 100))

In [None]:
data

In [None]:
x = data.iloc[:, 1:].to_numpy()

x = prepare_data(x)
y = data['Life expectancy '].to_numpy()

x_train = x[:-100, :]
y_train = y[:-100]

x_test = x[-100:, :]
y_test = y[-100:]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

In [None]:
model = SVR(epsilon=11, loss_function='linear', kernel='quadratic')
model.fit(x_train, y_train, n_iters=500, learning_rate=0.5)
predicted = [model.predict(np.array([x]))[0][0] for x in x_test]
df = pd.DataFrame({'Actual': y_test, 'Predicted': predicted})
df = df.round()
df['Substract'] = df['Predicted'] - df['Actual']

rmse = ((df['Predicted'] - df['Actual']) ** 2).mean() ** 0.5
mae = ((df['Predicted'] - df['Actual']).abs()).mean()
df.head(10)

print(f'\n\nRMSE: {rmse:.2f}')
print(f'MAE: {mae:.2f}')

df.plot()

In [None]:
path = os.path.join('ai_project', 'data', 'processed', 'house_price.csv')

data = pd.read_csv(path)

In [None]:
from sklearn.linear_model import LinearRegression

train_test_split()