## Simple Linear Regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('../../03_data-visualization/02_lab-matplotlib-seaborn/your-code/Fitbit2.csv') 
data.head()

### Simple Regression Plot

In [None]:
sns.regplot(x="NumberOfAwakings", y="MinutesOfBeingAwake", data=data)


[MinutesOfBeingAwake] = b0 + b1*[NumberOfAwakings]

### Estimating OLS using statsmodels

In [None]:
import statsmodels.api as sm

In [None]:
help(sm.OLS)

In [None]:
X = data['NumberOfAwakings']
Y = data['MinutesOfBeingAwake']
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
results.summary()

### Estimating OLS parameters using scipy.stats

In [None]:
from scipy.stats import linregress
help(linregress)

In [None]:
X = data['NumberOfAwakings']
Y = data['MinutesOfBeingAwake']
slope, intercept, r_value, p_value, std_err  = linregress(X, Y)
print ('The slope is: ' + str(slope))
print ('The intercept is: ' + str(intercept))

#### Calculating Confidence Intervals

In [None]:
from scipy import stats
help(stats.t.interval)
d_freedom = len(Y) - 2
stats.t.interval(0.95, d_freedom, slope, std_err)

#### Predictions

In [None]:
predictions = [intercept + slope*x for x in X]
print(predictions)

In [None]:
np.corrcoef(Y, predictions)
plt.scatter(Y, predictions)

#### Calculating residuals

In [None]:
residuals = [Y[i] - predictions[i] for i in range(len(Y))]

## Using sklearn

In [None]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
help(linear_model.LinearRegression)

In [None]:
X = X.values.reshape(-1, 1)
lm = linear_model.LinearRegression()
model = lm.fit(X,Y)
lm.score(X,Y)

In [None]:
print(lm.intercept_)
print(lm.coef_)
predictions = lm.predict(X)
print(r2_score(Y, predictions))
print(mean_squared_error(Y, predictions))


## Logistic Regression

In [None]:
churn = pd.read_csv('../../04_bi-tableau/04_lab-bi-analysis-with-tableau/data/churn.csv')

In [None]:
churn.head()

In [None]:
churn.dtypes

In [None]:
churn['Churn']

In [None]:
Y = pd.DataFrame(np.where(churn['Churn']=='Yes', 1, 0))
X = churn[['MonthlyCharges']]
X = sm.add_constant(X)
model = sm.Logit(Y, X).fit()
model.summary()