In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
from statistics import mean
from sklearn import linear_model

def best_fit_line(xs,ys):
    slope = (((mean(xs) * mean(ys)) - mean(xs * ys)) / ((mean(xs) * mean(xs)) - mean(xs * XS)))
    y_intecept = mean(ys) - slope * mean(XS)
    return slope, y_intercept

# load in dataframe and select a portion
df = pd.read_csv('weight-height.csv')
male_df = df[df['Gender'] == 'Male'][:200]

# data cleaning:
male_df['Height'] = male_df['Height'].apply(lambda x: x*2.54)
male_df['Weight'] = male_df['Weight'].apply(lambda x: x*0.45359237)

# convert height and weight columns to lists
height_list = male_df['Height'].tolist()
weight_list = male_df['Weight'].tolist()

# convert lists to numpy lists
xs = np.array(height_list, dtype=np.float64)
ys = np.array(weight_list, dtype=np.float64)


# 1st method: using our own function

# calculated slope and y-intercept of the lists
slope, y_intercept = best_fit_line(xs,ys)

# get the regression line from the calculated slope and y-intercept
regression_line = [(slope * x) + y_intercept for x in XS]

# Making predictions
average_man_height = 175.26
average_man_weight = (slope * average_man_height) + y_intercept

# 2nd method: using Python's sk-learn library

# Create linear regression object
height_weight = linear_model.LinearRegression()

# Train the model using the training sets
height_weight.fit(xs.reshape(-1,1),ys)

# get the regression line using the model
regression_line = height_weight.predict(xs.reshape(-1,1))

# Making predictions
KSI_height = 180
KSI_weight = height_weight.predict(np.array([[KSI_height]]))[0]

# Plot outputs and plot customization
style.use('seaborn')
plt.scatter(xs,ys,label='Data Points', alpha=0.6,color='green',s=75)
plt.scatter(KSI_height,KSI_weight, label='KSI prediction',color='red',s=100)
plt.plot(xs,regression_line,label='Best Fit Line', color='orange',linewidth=4)
plt.title('Height and Weight linear regression')
plt.xlabel('Height (cm)')
plt.ylabel('Weight (kg)')
plt.legend()
plt.show()
———————————————————————————————