## Chapître 5: Descriptive Statistics

In [None]:
# View first 20 rows
from pandas import read_csv
data = read_csv('../HousePrices/trainfull.csv')
peek = data.head(20)
print(peek)

In [None]:
# Dimensions of your data
shape = data.shape
print(shape)

In [None]:
# Data Types for Each Attribute
types = data.dtypes
print(types)

In [None]:
# Statistical Summary
from pandas import set_option
set_option('display.width', 100)
set_option('precision', 3)
description = data.describe()
print(description)

In [None]:
# Pairwise Pearson correlations
set_option('display.width', 100)
set_option('precision', 3)
correlations = data.corr(method='pearson')
print(correlations)

In [None]:
# Skew for each attribute
skew = data.skew()
print(skew)

## Chapître 6: Visualisation

In [None]:
# Univariate Histograms
from matplotlib import pyplot
import matplotlib
%matplotlib inline
matplotlib.rc('figure', figsize=(20, 20))
figure = data.hist()
pyplot.show()

In [None]:
# Univariate Density Plots
matplotlib.rc('figure', figsize=(20, 20))
figure =data.plot(kind='density', subplots=True, layout=(10,4), sharex=False)
pyplot.show()

In [None]:
# Box and Whisker Plots
matplotlib.rc('figure', figsize=(20, 20))
figure =data.plot(kind='box', subplots=True, layout=(10,4), sharex=False, sharey=False)
pyplot.show()

In [None]:
# Correction Matrix Plot
import numpy as np
correlations = data.corr()
# plot correlation matrix
fig = pyplot.figure(figsize=(25, 25))
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,37,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
pyplot.show()

In [None]:
# Scatterplot Matrix
from pandas.plotting import scatter_matrix
matplotlib.rc('figure', figsize=(40, 40))
figure =scatter_matrix(data)
pyplot.show()

## Chapître 7: Prepare Your Data For Machine Learning

In [None]:
data = data.select_dtypes(exclude=['object'])

In [None]:
data.fillna(0, inplace=True)

In [None]:
# Rescale data (between 0 and 1)
from numpy import set_printoptions
from sklearn.preprocessing import MinMaxScaler
array = data.values
# separate array into input and output components

X = array[:,0:36]
Y = array[:,37]

scaler = MinMaxScaler(feature_range=(0, 1))
rescaledX = scaler.fit_transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[0:5,:])

In [None]:
# Standardize data (0 mean, 1 stdev)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X)
rescaledX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[0:5,:])

In [None]:
# Normalize data (length of 1)
from sklearn.preprocessing import Normalizer
scaler = Normalizer().fit(X)
normalizedX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(normalizedX[0:5,:])

In [None]:
# binarization
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=0.0).fit(X)
binaryX = binarizer.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(binaryX[0:5,:])

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
X = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])
print(X)
print(y)

In [None]:
model = LinearRegression().fit(X, y)

In [None]:
model.score(X, y)

In [None]:
r_sq = model.score(X, y)
print('coefficient de détermination:', r_sq)

In [None]:
 y_pred = model.predict(X)
print('réponse prévue:', y_pred)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.scatter(X,y)
plt.plot()
plt.show()