In [1]:
# Import our dependencies
import pandas as pd
import numpy as np

In [2]:
# read in cvs as dataframe
df = pd.read_csv("../Data/socialmedia_usage_agewise.csv")
df.head()

Unnamed: 0,usagedate,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per
0,2005-03-21,0.07,0.06,0.04,0.03
1,2005-12-08,0.16,0.09,0.05,0.02
2,2006-08-31,0.41,0.06,0.03,0.0
3,2008-05-11,0.6,0.21,0.07,0.02
4,2008-08-10,0.65,0.27,0.1,0.03


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   usagedate      30 non-null     object 
 1   age_18_29_per  30 non-null     float64
 2   age_30_49_per  30 non-null     float64
 3   age_50_64_per  30 non-null     float64
 4   over_65_per    30 non-null     float64
dtypes: float64(4), object(1)
memory usage: 1.3+ KB


In [4]:
# convert string to Datetime
df['usagedate'] = pd.to_datetime(df['usagedate'])

In [5]:
# extract the year
df['year'] = pd.to_datetime(df['usagedate']).dt.year
df.head()

Unnamed: 0,usagedate,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year
0,2005-03-21,0.07,0.06,0.04,0.03,2005
1,2005-12-08,0.16,0.09,0.05,0.02,2005
2,2006-08-31,0.41,0.06,0.03,0.0,2006
3,2008-05-11,0.6,0.21,0.07,0.02,2008
4,2008-08-10,0.65,0.27,0.1,0.03,2008


In [6]:
# drop usagedate column
df.drop(columns=['usagedate'], inplace=True)
df.head()

Unnamed: 0,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year
0,0.07,0.06,0.04,0.03,2005
1,0.16,0.09,0.05,0.02,2005
2,0.41,0.06,0.03,0.0,2006
3,0.6,0.21,0.07,0.02,2008
4,0.65,0.27,0.1,0.03,2008


In [7]:
# groupby year to get the mean
df = df.groupby(['year'], as_index=False).mean()
df

Unnamed: 0,year,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per
0,2005,0.115,0.075,0.045,0.025
1,2006,0.41,0.06,0.03,0.0
2,2008,0.638,0.266,0.092,0.024
3,2009,0.716667,0.443333,0.22,0.063333
4,2010,0.776667,0.531667,0.326667,0.106667
5,2011,0.805,0.6,0.37,0.13
6,2012,0.845,0.66,0.435,0.19
7,2013,0.876667,0.726667,0.51,0.24
8,2014,0.84,0.77,0.52,0.27
9,2015,0.9,0.77,0.51,0.35


In [8]:
# get median age of age_18_29_per and get year born
df['year_born_24'] = df['year'] - 24
df['year_born_40'] = df['year'] - 40
df['year_born_57'] = df['year'] - 57
df['year_born_65'] = df['year'] - 65
df

Unnamed: 0,year,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year_born_24,year_born_40,year_born_57,year_born_65
0,2005,0.115,0.075,0.045,0.025,1981,1965,1948,1940
1,2006,0.41,0.06,0.03,0.0,1982,1966,1949,1941
2,2008,0.638,0.266,0.092,0.024,1984,1968,1951,1943
3,2009,0.716667,0.443333,0.22,0.063333,1985,1969,1952,1944
4,2010,0.776667,0.531667,0.326667,0.106667,1986,1970,1953,1945
5,2011,0.805,0.6,0.37,0.13,1987,1971,1954,1946
6,2012,0.845,0.66,0.435,0.19,1988,1972,1955,1947
7,2013,0.876667,0.726667,0.51,0.24,1989,1973,1956,1948
8,2014,0.84,0.77,0.52,0.27,1990,1974,1957,1949
9,2015,0.9,0.77,0.51,0.35,1991,1975,1958,1950


In [9]:
df.loc[(df['year_born_24'] >= 1997) & (df['year_born_24'] <= 2012), 'generation_24'] = 'Generation Z'  
df.loc[(df['year_born_24'] >= 1981) & (df['year_born_24'] <= 1996), 'generation_24'] = 'Millennials'  
df.loc[(df['year_born_24'] >= 1965) & (df['year_born_24'] <= 1980), 'generation_24'] = 'Generation X'  
df.loc[(df['year_born_24'] >= 1946) & (df['year_born_24'] <= 1964), 'generation_24'] = 'Boomers'
df.loc[(df['year_born_24'] >= 1928) & (df['year_born_24'] <= 1945), 'generation_24'] = 'Silent'
df.loc[(df['year_born_24'] >= 1901) & (df['year_born_24'] <= 1927), 'generation_24'] = 'G.I. Generation'
df

Unnamed: 0,year,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year_born_24,year_born_40,year_born_57,year_born_65,generation_24
0,2005,0.115,0.075,0.045,0.025,1981,1965,1948,1940,Millennials
1,2006,0.41,0.06,0.03,0.0,1982,1966,1949,1941,Millennials
2,2008,0.638,0.266,0.092,0.024,1984,1968,1951,1943,Millennials
3,2009,0.716667,0.443333,0.22,0.063333,1985,1969,1952,1944,Millennials
4,2010,0.776667,0.531667,0.326667,0.106667,1986,1970,1953,1945,Millennials
5,2011,0.805,0.6,0.37,0.13,1987,1971,1954,1946,Millennials
6,2012,0.845,0.66,0.435,0.19,1988,1972,1955,1947,Millennials
7,2013,0.876667,0.726667,0.51,0.24,1989,1973,1956,1948,Millennials
8,2014,0.84,0.77,0.52,0.27,1990,1974,1957,1949,Millennials
9,2015,0.9,0.77,0.51,0.35,1991,1975,1958,1950,Millennials


In [10]:
df.loc[(df['year_born_40'] >= 1997) & (df['year_born_40'] <= 2012), 'generation_40'] = 'Generation Z'  
df.loc[(df['year_born_40'] >= 1981) & (df['year_born_40'] <= 1996), 'generation_40'] = 'Millennials'  
df.loc[(df['year_born_40'] >= 1965) & (df['year_born_40'] <= 1980), 'generation_40'] = 'Generation X'  
df.loc[(df['year_born_40'] >= 1946) & (df['year_born_40'] <= 1964), 'generation_40'] = 'Boomers'
df.loc[(df['year_born_40'] >= 1928) & (df['year_born_40'] <= 1945), 'generation_40'] = 'Silent'
df.loc[(df['year_born_40'] >= 1901) & (df['year_born_40'] <= 1927), 'generation_40'] = 'G.I. Generation'
df

Unnamed: 0,year,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year_born_24,year_born_40,year_born_57,year_born_65,generation_24,generation_40
0,2005,0.115,0.075,0.045,0.025,1981,1965,1948,1940,Millennials,Generation X
1,2006,0.41,0.06,0.03,0.0,1982,1966,1949,1941,Millennials,Generation X
2,2008,0.638,0.266,0.092,0.024,1984,1968,1951,1943,Millennials,Generation X
3,2009,0.716667,0.443333,0.22,0.063333,1985,1969,1952,1944,Millennials,Generation X
4,2010,0.776667,0.531667,0.326667,0.106667,1986,1970,1953,1945,Millennials,Generation X
5,2011,0.805,0.6,0.37,0.13,1987,1971,1954,1946,Millennials,Generation X
6,2012,0.845,0.66,0.435,0.19,1988,1972,1955,1947,Millennials,Generation X
7,2013,0.876667,0.726667,0.51,0.24,1989,1973,1956,1948,Millennials,Generation X
8,2014,0.84,0.77,0.52,0.27,1990,1974,1957,1949,Millennials,Generation X
9,2015,0.9,0.77,0.51,0.35,1991,1975,1958,1950,Millennials,Generation X


In [11]:
df.loc[(df['year_born_57'] >= 1997) & (df['year_born_57'] <= 2012), 'generation_57'] = 'Generation Z'  
df.loc[(df['year_born_57'] >= 1981) & (df['year_born_57'] <= 1996), 'generation_57'] = 'Millennials'  
df.loc[(df['year_born_57'] >= 1965) & (df['year_born_57'] <= 1980), 'generation_57'] = 'Generation X'  
df.loc[(df['year_born_57'] >= 1946) & (df['year_born_57'] <= 1964), 'generation_57'] = 'Boomers'
df.loc[(df['year_born_57'] >= 1928) & (df['year_born_57'] <= 1945), 'generation_57'] = 'Silent'
df.loc[(df['year_born_57'] >= 1901) & (df['year_born_57'] <= 1927), 'generation_57'] = 'G.I. Generation'
df

Unnamed: 0,year,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year_born_24,year_born_40,year_born_57,year_born_65,generation_24,generation_40,generation_57
0,2005,0.115,0.075,0.045,0.025,1981,1965,1948,1940,Millennials,Generation X,Boomers
1,2006,0.41,0.06,0.03,0.0,1982,1966,1949,1941,Millennials,Generation X,Boomers
2,2008,0.638,0.266,0.092,0.024,1984,1968,1951,1943,Millennials,Generation X,Boomers
3,2009,0.716667,0.443333,0.22,0.063333,1985,1969,1952,1944,Millennials,Generation X,Boomers
4,2010,0.776667,0.531667,0.326667,0.106667,1986,1970,1953,1945,Millennials,Generation X,Boomers
5,2011,0.805,0.6,0.37,0.13,1987,1971,1954,1946,Millennials,Generation X,Boomers
6,2012,0.845,0.66,0.435,0.19,1988,1972,1955,1947,Millennials,Generation X,Boomers
7,2013,0.876667,0.726667,0.51,0.24,1989,1973,1956,1948,Millennials,Generation X,Boomers
8,2014,0.84,0.77,0.52,0.27,1990,1974,1957,1949,Millennials,Generation X,Boomers
9,2015,0.9,0.77,0.51,0.35,1991,1975,1958,1950,Millennials,Generation X,Boomers


In [12]:
df.loc[(df['year_born_65'] >= 1997) & (df['year_born_65'] <= 2012), 'generation_65'] = 'Generation Z'  
df.loc[(df['year_born_65'] >= 1981) & (df['year_born_65'] <= 1996), 'generation_65'] = 'Millennials'  
df.loc[(df['year_born_65'] >= 1965) & (df['year_born_65'] <= 1980), 'generation_65'] = 'Generation X'  
df.loc[(df['year_born_65'] >= 1946) & (df['year_born_65'] <= 1964), 'generation_65'] = 'Boomers'
df.loc[(df['year_born_65'] >= 1928) & (df['year_born_65'] <= 1945), 'generation_65'] = 'Silent'
df.loc[(df['year_born_65'] >= 1901) & (df['year_born_65'] <= 1927), 'generation_65'] = 'G.I. Generation'
df

Unnamed: 0,year,age_18_29_per,age_30_49_per,age_50_64_per,over_65_per,year_born_24,year_born_40,year_born_57,year_born_65,generation_24,generation_40,generation_57,generation_65
0,2005,0.115,0.075,0.045,0.025,1981,1965,1948,1940,Millennials,Generation X,Boomers,Silent
1,2006,0.41,0.06,0.03,0.0,1982,1966,1949,1941,Millennials,Generation X,Boomers,Silent
2,2008,0.638,0.266,0.092,0.024,1984,1968,1951,1943,Millennials,Generation X,Boomers,Silent
3,2009,0.716667,0.443333,0.22,0.063333,1985,1969,1952,1944,Millennials,Generation X,Boomers,Silent
4,2010,0.776667,0.531667,0.326667,0.106667,1986,1970,1953,1945,Millennials,Generation X,Boomers,Silent
5,2011,0.805,0.6,0.37,0.13,1987,1971,1954,1946,Millennials,Generation X,Boomers,Boomers
6,2012,0.845,0.66,0.435,0.19,1988,1972,1955,1947,Millennials,Generation X,Boomers,Boomers
7,2013,0.876667,0.726667,0.51,0.24,1989,1973,1956,1948,Millennials,Generation X,Boomers,Boomers
8,2014,0.84,0.77,0.52,0.27,1990,1974,1957,1949,Millennials,Generation X,Boomers,Boomers
9,2015,0.9,0.77,0.51,0.35,1991,1975,1958,1950,Millennials,Generation X,Boomers,Boomers
