In [1]:
# Common Libraries
import pandas as pd
import numpy as np
import scipy as sp
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore") # Ignore warnings
np.set_printoptions(threshold='nan') # To print the whole array

%matplotlib inline

# Correlation
***

In [None]:
# Seaborn Scatterplot
ax = sns.scatterplot(x="total_bill", y="tip", hue="time", style="time", data=tips)

In [None]:
# Hypothesis Testing
print("Pearson correlation with Scipy")
r, p = sp.stats.pearsonr(df['X'],df['Y'])
print("correlation coefficient: ", r, "; p-value: ", p)

# One Sample T-Test
***

In [None]:
# Seaborn Histogram
sns.distplot(x, kde=False, rug=True)

In [None]:
# Hypothesis Testing
t, p = sp.stats.ttest_1samp(df['ice_cream'], popmean=0)
print("t-value: ", t, "; p-value: ", p)

# Independent Samples T-Test
***

In [None]:
# Seaborn Boxplots
ax = sns.boxplot(x="Category", y="Continuous", data=df)

In [None]:
# Create groups
Group1 = df[(df['Column1'] == 'Group1')]
Group2 = df[(df['Column1'] == 'Group2')]

# Hypothesis Testing
t, p, df = sm.stats.ttest_ind(Group1['Y'], Group2['Y'] alternative='two-sided', usevar='unequal')
# alternative also accepts 'larger' and 'smaller' as one-sided indications
print("t-value: ", t, "; p-value: ", p, "; df: ", df)

# ANOVA
***

In [None]:
# Seaborn Boxplots
ax = sns.boxplot(x="Category", y="Continuous", data=df)

In [None]:
# One Way ANOVA
moore_lm = ols('Y ~ C(fcategory, Sum)*C(partner_status, Sum)', data=data).fit()
table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame
print(table)

# Two Way ANOVA
formula = 'len ~ C(supp) + C(dose) + C(supp):C(dose)'
model = ols(formula, data).fit()
aov_table = statsmodels.stats.anova.anova_lm(model, typ=2)
print(aov_table)

# OLS Regression
***

In [None]:
# Seaborn Linear Model Plot
g = sns.lmplot(x="X", y="Y", data=df)

In [None]:
# Hypothesis Testing
model = smf.ols('Y ~ X',data=df)
model_fit = model.fit()
print(model_fit.summary())

# Generalized Linear Model
***

In [None]:
# Hypothesis Testing
model= smf.logit(formula="admit~ gre + gpa + C(rank)", data= df).fit()
model.summary()

# Odds Ratio
model_odds = pd.DataFrame(np.exp(model.params), columns= ['OR'])
model_odds['z-value']= model.pvalues
model_odds[['2.5%', '97.5%']] = np.exp(model.conf_int())
model_odds

# Linear Mixed Model
***

In [None]:
# Hypothesis Testing
d = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"], re_formula="~Time")
mdf = md.fit()
print(mdf.summary())