# Visual & Statistical Analysis In-class Exercise

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import ttest_ind

import warnings
warnings.filterwarnings("ignore")

The file n90pol.csv contains information on 90 university students who participated in a psychological experiment designed to look for relationships between the size of different regions of the brain and political views. The variables amygdala and acc indicate the volume of two particular brain regions known to be involved in emotions and decision-making, the amygdala and the anterior cingulate cortex; more exactly, these are residuals from the predicted volume, after adjusting for height, sex, and similar body-type variables. The variable orientation gives the students' political orientation on a five-point scale from 1 (very conservative) to 5 (very liberal).

In [26]:
# Run this before any other code cell
# This downloads the csv data files into the same directory where you have saved this notebook

import urllib.request
from pathlib import Path
import os
path = Path()

# Dictionary of file names and download links
files = {'n90pol.csv':'https://storage.googleapis.com/aipi_datasets/n90pol.csv'}

# Download each file
for key,value in files.items():
    filename = path/key
    url = value
    # If the file does not already exist in the directory, download it
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url,filename)

In [28]:
student_data = pd.read_csv('n90pol.csv')
student_data.head()


Unnamed: 0,amygdala,acc,orientation
0,0.0051,-0.0286,2
1,-0.0674,0.0007,3
2,-0.0257,-0.011,3
3,0.0504,-0.0167,2
4,0.0125,-0.0005,5


## Exercise
Use visual and statistical methods to determine if there is a difference in amygdala and/or acc size for students of different political orientations.  You should use at least 1 visual and 1 statistical method to support your conclusion.

### Visual method

In [6]:
### BEGIN SOLUTION ###
anova_model = ols('acc ~ orientation', data =  student_data).fit()
anova_table = sm.stats.anova_lm(anova_model)


### END SOLUTION ###

### Statistical method(s)

In [10]:
### BEGIN SOLUTION ###
#ANOVA acc 
#explicitly encode the orientation variable so the model knows that it is catagories 
anova_model_acc = ols('acc ~ orientation', data =  student_data).fit()
anova_table_acc = sm.stats.anova_lm(anova_model_acc)
anova_table_acc
### END SOLUTION ###

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
orientation,1.0,0.002708,0.002708,6.91441,0.010091
Residual,88.0,0.034459,0.000392,,


In [9]:
#ANOVA amygdala 
anova_model_amygdala = ols('amygdala ~ orientation', data =  student_data).fit()
anova_table_amygdala = sm.stats.anova_lm(anova_model_amygdala)
anova_table_amygdala 



Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
orientation,1.0,0.004642,0.004642,4.540365,0.035894
Residual,88.0,0.089975,0.001022,,


In [20]:
#acc t test 
view_pol = [(2,3), (2,4), (2,5), (3,4), (3,5), (4,5)]

for i,j in view_pol:
    cat1 = student_data[student_data['orientation'] == i]
    cat2 = student_data[student_data['orientation'] == j]
    print(ttest_ind(cat1['acc'], cat2['acc'], alternative = 'less'))
#shows two is less then other groups  

Ttest_indResult(statistic=-2.3644543465009393, pvalue=0.01186683732417668)
Ttest_indResult(statistic=-2.6130648099720792, pvalue=0.00585320817663907)
Ttest_indResult(statistic=-2.8650700533258613, pvalue=0.00437642751123853)
Ttest_indResult(statistic=0.07165009699684684, pvalue=0.5284463258119152)
Ttest_indResult(statistic=-0.9030842295908743, pvalue=0.18641760528874474)
Ttest_indResult(statistic=-1.0747597195106824, pvalue=0.14377045531910934)


In [25]:
#amygdala test 
view_pol = [(2,3), (2,4), (2,5), (3,4), (3,5), (4,5)]

for i,j in view_pol:
    cat1 = student_data[student_data['orientation'] == i]
    cat2 = student_data[student_data['orientation'] == j]
    print(ttest_ind(cat1['amygdala'], cat2['amygdala'], alternative = 'greater'))
#amygdala- 2,3 and 2,4 

Ttest_indResult(statistic=1.5139686740380098, pvalue=0.06950643399474973)
Ttest_indResult(statistic=2.4473213170889148, pvalue=0.008903213821213686)
Ttest_indResult(statistic=2.1343740121364743, pvalue=0.021847191261752552)
Ttest_indResult(statistic=0.6226704577680827, pvalue=0.2678742739780984)
Ttest_indResult(statistic=0.5170224258104901, pvalue=0.30424388286255305)
Ttest_indResult(statistic=0.10011813418583253, pvalue=0.4603215369827341)
