machine Learning
Author: Jessyca 
github link: https://github.com/JessycaMcGee/datafun-07-ml


In [None]:
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split

7.5 Chart a straight line

In [None]:
c = lambda f: 5 / 9 * (f - 32)

temps = [(f, c(f)) for f in range(0, 101, 10)]

# Plot Celsius vs Farenheit
temps_df = pd.DataFrame(temps, columns=['Fahrenheit', 'Celsius'])

axes = temps_df.plot(x='Fahrenheit', y='Celsius', style='.-')

y_label = axes.set_ylabel('Celsius')

7.6 Average High Temperatures in NYC in January

Section 1 - Data Acquisition

In [None]:
# Load NYC January high temperature csv
nyc = pd.read_csv('ave_hi_nyc_jan_1895-2018.csv')

Section 2 - Data Inspection

In [None]:
# View head
nyc.head()

#View Tail
nyc.tail()

Section 3 - Data Cleaning

In [None]:
# Rename columns
nyc.columns = ['Date', 'Temperature', 'Anomaly']
# Check
nyc.head(3)

# Remove 01 (January) from labels'
nyc.Date = nyc.Date.floordiv(100)
# Check
nyc.head(3)

Section 4 - Descriptive Statistics

In [None]:
pd.set_option('display.precision',2)

nyc.Temperature.describe()

Section 5 - Build the Model

In [None]:
linear_regression = stats.linregress(x=nyc.Date,
                                     y=nyc.Temperature)

linear_regression.slope
linear_regression.intercept

linear_regression.slope * 2019 + linear_regression.intercept

Section 6 - Predict

In [None]:
linear_regression = stats.linregress(x=nyc.Date,
                                     y=nyc.Temperature)

linear_regression.slope
linear_regression.intercept

linear_regression.slope * 2024 + linear_regression.intercept

Section 7 - Visualizations

In [None]:
sns.set_style('whitegrid')

axes = sns.regplot(x=nyc.Date, y=nyc.Temperature)

axes.set_ylim(10, 70)

7.7 Average High Temperatures in NYC in January

Section 1 - Build the Model

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
     nyc.Date.values.reshape(-1, 1), nyc.Temperature.values, 
     random_state=11)

X_train.shape

In [None]:
X_test.shape

In [None]:
# Train

from sklearn.linear_model import LinearRegression

linear_regression = LinearRegression()

linear_regression.fit(X=X_train, y=y_train)

In [None]:
linear_regression.coef_

In [None]:
linear_regression.intercept_

Section 2 - Test the Model

In [None]:
predicted = linear_regression.predict(X_test)

expected = y_test

for p, e in zip(predicted[::5], expected[::5]):
     print(f'predicted: {p:.2f}, expected: {e:.2f}')

Section 3 - Predict

In [None]:
predict = (lambda x: linear_regression.coef_ * x + 
                      linear_regression.intercept_)

predict(2024)

In [None]:
predict(1890)

Section 4 - Visualizations

In [None]:
axes = sns.scatterplot(data=nyc, x='Date', y='Temperature',
     hue='Temperature', palette='winter', legend=False)

axes.set_ylim(10, 70)

import numpy as np

x = np.array([min(nyc.Date.values), max(nyc.Date.values)])

y = predict(x)

import matplotlib.pyplot as plt 

line = plt.plot(x, y)

7.8 insight