# EDS Case Study

Exploring changes in outlook

Allen Downey

[MIT License](https://en.wikipedia.org/wiki/MIT_License)

In [None]:
# If we're running in Colab, set up the environment

import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    !pip install empiricaldist
    !git clone --depth 1 https://github.com/AllenDowney/ExploratoryDataAnalysis
    %cd ExploratoryDataAnalysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from utils import decorate
from utils import plot_series_lowess, plot_columns_lowess

ImportError: cannot import name 'plot_series_lowess' from 'utils' (/home/downey/ExploratoryDataAnalysis/utils.py)

### Loading the GSS data

In [None]:
gss = pd.read_hdf('eds.gss.hdf5', 'gss0')
gss.shape

In [None]:
def replace_invalid(df):

    # recode so depends is in the middle    
    d = {1:1, 2:0, 3:0.5}
    df.trust.replace(d, inplace=True)
    df.helpful.replace(d, inplace=True)
    df.racpres.replace(d, inplace=True)
    df.owngun.replace(d, inplace=True)
    df.fepol.replace(d, inplace=True)
    df.sexeduc.replace(d, inplace=True)

    d = {1:0, 2:1, 3:0.5}
    df.fair.replace(d, inplace=True)

In [None]:
muted = sns.color_palette('muted', 5)
sns.palplot(muted)

In [None]:
colors = {'Conservative': muted[3], 
              'Moderate': muted[4], 
               'Liberal': muted[0]}

### 3-point scale

To make it easier to visualize groups, I'm going to lump the 7-point scale into a 3-point scale.

With this scale, there are roughly the same number of people in each group.

In [None]:
def make_polviews3(df):
    """Replace 7 point scale with 3 point scale.
    
    df: DataFrame
    """
    d = {1:'Liberal', 
         2:'Liberal', 
         3:'Liberal', 
         4:'Moderate', 
         5:'Conservative', 
         6:'Conservative', 
         7:'Conservative'}
    
    df['polviews3'] = df.polviews.replace(d)
    
make_polviews3(gss)

In [None]:
def values(series):
    return series.value_counts().sort_index()

values(gss['polviews3'])

## Fair

Response to [this question](https://gssdataexplorer.norc.org/projects/52787/variables/440/vshow):

Do you think most people would try to take advantage of you if they got a chance, or would they try to be fair?

```
1	Take advantage
2	Fair
3	Depends
```


In [None]:
values(gss['fair'])

In [None]:
d = {1:0, 2:1, 3:0.5}
gss['fair'].replace(d, inplace=True)
values(gss['fair'])

In [None]:
def group_by_year(df, varname):
    """Group by year and compute mean of `varname`.
    
    df: DataFrame
    varname: string variable name
    
    returns: Series
    """
    grouped = df.groupby('year')
    return grouped[varname].mean().dropna()

In [None]:
def decorate_by_year(**options):
    """Label the axes.
    
    options: keyword arguments passed to `decorate`.
    """
    decorate(xlabel='Year',
             ylabel='Fraction saying yes',
             xlim=[1970, 2020],
             **options)

In [None]:
mean_by_year = group_by_year(gss, 'fair')
plot_series_lowess(mean_by_year, 'C1')

title='Would most people try to be fair?'
decorate_by_year(title=title)

In [None]:
def group_by_polviews(df, varname):
    """Group by polviews and year, and compute mean of varname.
    
    df: DataFrame
    varname: string variable name
    
    returns: DataFrame
    """
    grouped = df.groupby(['polviews3', 'year'])
    return grouped[varname].mean().dropna().unstack(level=0)

In [None]:
mean_by_polviews = group_by_polviews(gss, 'fair')
mean_by_polviews.head()

In [None]:
columns = ['Conservative', 'Liberal', 'Moderate']
plot_columns_lowess(mean_by_polviews, columns, colors)
decorate_by_year(title=title)

## Trust

Response to [this question](https://gssdataexplorer.norc.org/projects/52787/variables/441/vshow)

> Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people?

```
1	Can trust
2	Cannot trust
3	Depends
```

In [None]:
values(gss['trust'])

In [None]:
d = {1:1, 2:0, 3:0.5}
gss['trust'].replace(d, inplace=True)
values(gss['trust'])

In [None]:
mean_by_year = group_by_year(gss, 'trust')
plot_series_lowess(mean_by_year, 'C1')

title='Can most people be trusted?'
decorate_by_year(title=title)

In [None]:
mean_by_polviews = group_by_polviews(gss, 'trust')
mean_by_polviews.head()

In [None]:
columns = ['Conservative', 'Liberal', 'Moderate']
plot_columns_lowess(mean_by_polviews, columns, colors)
decorate_by_year(title=title)

## Helpful

Response to [this question](https://gssdataexplorer.norc.org/projects/52787/variables/439/vshow):

>Would you say that most of the time people try to be helpful, or that they are mostly just looking out for themselves?

```
1	Helpful
2	Lookout for self
3	Depends
```

In [None]:
values(gss['helpful'])

In [None]:
d = {1:1, 2:0, 3:0.5}
gss['helpful'].replace(d, inplace=True)
values(gss['helpful'])

In [None]:
mean_by_year = group_by_year(gss, 'helpful')
plot_series_lowess(mean_by_year, 'C1')

title='Do most people try to be helpful?'
decorate_by_year(title=title)

In [None]:
mean_by_polviews = group_by_polviews(gss, 'helpful')
mean_by_polviews.head()

In [None]:
columns = ['Conservative', 'Liberal', 'Moderate']
plot_columns_lowess(mean_by_polviews, columns, colors)
decorate_by_year(title=title)