# Linear regression




In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Create a sample of 180 values
m = 180
X = np.linspace(0, 10, m).reshape(m,1)
y = X + np.random.randn(m, 1)

plt.xlabel("Data linearly correlated")
plt.scatter(X, y)
plt.show()

In [None]:
# Create our linear regression model
model = LinearRegression()

# Train the model
model.fit(X, y)
print("model score :", model.score(X, y))

plt.xlabel("regression line ")
plt.scatter(X, y)
plt.plot(X, model.predict(X), c='red')
plt.show()

# Polynomial regression





In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

x = [1,2,3,5,6,7,8,9,10,12,13,14,15,16,18,19,21,22]
y = [99,95,70,55,55,50,60,75,80,70,85,76,78,75,93,98,98,103]
model = np.poly1d(np.polyfit(x, y, 3))

# We display the line, starting from point 1 to point 22 
line = np.linspace(1, 22, 100)
plt.scatter(x, y)
plt.plot(line, model(line))
plt.show()

# Le score du modèle
print(" ")
print("model score is :", r2_score(y, model(x)))

# Le clustering K-moyennes

In [None]:
# Import libraries
from sklearn.cluster import KMeans
from sklearn import datasets
# This library helps to process data
from sklearn.preprocessing import scale

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Import Iris dataset 
iris = datasets.load_iris()

# create a dataframe
df = pd.DataFrame(iris['data'])

X = scale(iris.data)

# Store flowers type
y = pd.DataFrame(iris.target)
variable_names = iris.feature_names
print("first 10 values : \n", X[0:10,])

In [None]:
# Optimum number of clusters using Kmeans method
inertia_intraclass = []
K = range(1,10)
for k in K:
    kmeanModel = KMeans(n_clusters=k)
    kmeanModel.fit(df)
    inertia_intraclass.append(kmeanModel.inertia_)

In [None]:
# On affiche un graphique qui montre l'évolution de l’inertie intraclasses avec l'augmentation du nombre de clusters plt.figure(figsize=(8, 8))
plt.plot(K, inertia_intraclass, 'bx-')
plt.xlabel('Number of clusters K')
plt.ylabel('Inertia intraclass')
plt.title('Kmeans method shows the optimal numbers of clusters K \n')
plt.show()

In [None]:
# On créer un objet Kmeans en l’instanciant avec 3 clusters
k_average = KMeans(n_clusters=3)
# Train the model
k_average.fit(df)

# Aply the prediction
df['k_means'] = k_average.predict(df)
df['target'] = iris['target']

# Visualize clusters by color, green, yellow, red
color_theme = np.array(['green', 'yellow', 'red'])
plt.figure(figsize=(8, 8))
plt.scatter(df[0], df[1], c=color_theme[k_average.labels_], s=50)
plt.show()

# Principal components Analysis clustering

In [None]:
from sklearn import decomposition
from sklearn.decomposition import PCA
from sklearn import datasets

In [None]:
# Load the dataset
iris = datasets.load_iris()
X = iris.data
variable_names = iris.feature_names
print(" First 10 lines :\n", X[0:10,])

pca = decomposition.PCA()

# Train the algorithm
iris_pca = pca.fit_transform(X)

# Percentage of variance explained by each four principal components
print("Percentage of variance explained by each four principal components :\n",
pca.explained_variance_ratio_)


# Model choice

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Create a sample of 200 values m = 200
X = np.array([740, 880, 650, 1900, 1430])
y = np.array([25, 35, 19, 103, 67])

# Build the regression model
model = LinearRegression()

# Train the model
model.fit(X.reshape(-1, 1), y)

print("score is :", model.score(X.reshape(-1, 1), y))
print("With 1000 € you can pretend to an appartment of ", model.predict(np.array(1000).reshape(-1, 1)), "m2")
plt.scatter(X, y)
plt.plot(X.reshape(-1, 1), model.predict(X.reshape(-1, 1)), c='red')
plt.show()