In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
#Specify a seed so the numbers can be reproduced
np.random.seed(1998)
#Specify size of dataset, randomly generate points
n=100
x=np.array([np.random.random() for i in range(n)])
#Generate the true Y model with noise
y=[-np.pi*a + np.e for a in x]
noise = np.random.normal(0,1/3,n)
y+=noise
df=pd.DataFrame({'X':x,'Y':y})
df.head()

In [None]:
#Let's look at the data
plt.scatter(x, y)

plt.title('Data to Be Fit')
plt.xlabel('X values')
plt.ylabel('Labels (Y values)')

In [None]:
#Now let's train a linear regression model on our data
train_x, test_x, train_y, test_y = train_test_split(df[['X']], df['Y'], test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(train_x, train_y) # train/fit on known inputs
predictions = model.predict(test_x) # predict on new data

print("Score:", model.score(test_x, test_y))

In [None]:
#Look at the 
plt.plot(test_x, predictions, c='black', linewidth=.5) #Colored by species
plt.scatter(test_x, test_y)

plt.title('Linear Regression Model')
plt.xlabel('Test X values')
plt.ylabel('Predicted Y values')

plt.show()

In [None]:
print("Predicted slope: " + str(model.coef_[0]))
print("Actual slope: "+ str(-np.pi))

In [None]:
print("Predicted y-intercept: " + str(model.intercept_))
print("Actual y-intercept: "+ str(np.e))

In [None]:
# Validation set
not_testx, test_x, not_testy, test_y = train_test_split(df[['X']], df['Y'], test_size=0.2, random_state=42)
train_x, valid_x, train_y, valid_y = train_test_split(not_testx, not_testy, test_size=0.2, random_state=42)

Let's try using linear regression on the iris dataset

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
# Extracting septal and petal widths and lengths
X = pd.DataFrame(iris.data,columns=['Sepal Length','Sepal Width','Petal Length','Petal Width']) #Name columns according to sklearn API
Y = pd.DataFrame(iris.target)

X.head(5)

In [None]:
plt.scatter(X['Petal Length'], X['Sepal Length'])

plt.title('Iris Dataset: Sepal Length vs. Petal Length')
plt.xlabel('Petal Length')
plt.ylabel('Sepal Length')

plt.show()

In [None]:
iris_train_x, iris_test_x, iris_train_y, iris_test_y = train_test_split(X[['Sepal Length']], X[['Petal Length']], test_size=0.2, random_state=42)

In [None]:
model.fit(iris_train_x, iris_train_y)
iris_predictions = model.predict(iris_test_x) # predict on new data

print("Score:", model.score(iris_test_x, iris_test_y))

In [None]:
plt.plot(iris_test_x, iris_predictions, c='black', linewidth=.5) #Colored by species
plt.scatter(iris_test_x, iris_test_y)

plt.title('Linear Regression on Sepal vs. Petal Length')
plt.xlabel('Petal Length')
plt.ylabel('Predicted Sepal Length')

plt.show()