In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load legality
legality_df = pd.read_csv("../resources/State_Legality_Cannabis.csv")
legality_df.head()

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized
0,Alabama,0,0.0,1,2021
1,Alaska,1,2014.0,1,1998
2,Arizona,1,2020.0,1,2010
3,Arkansas,0,0.0,1,2016
4,California,1,2016.0,1,1996


In [3]:
# Get Data Types
legality_df.dtypes

State                  object
Recreational            int64
Rec Year Legalized    float64
Medical                 int64
Med Year Legalized      int64
dtype: object

In [4]:
# fill null values with 0
legality_df = legality_df.fillna(0)
legality_df.head()

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized
0,Alabama,0,0.0,1,2021
1,Alaska,1,2014.0,1,1998
2,Arizona,1,2020.0,1,2010
3,Arkansas,0,0.0,1,2016
4,California,1,2016.0,1,1996


In [5]:
# Convert Rec Year Legalized to int64
legality_df["Rec Year Legalized"] = legality_df["Rec Year Legalized"].astype(int)
legality_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized
0,Alabama,0,0,1,2021
1,Alaska,1,2014,1,1998
2,Arizona,1,2020,1,2010
3,Arkansas,0,0,1,2016
4,California,1,2016,1,1996
5,Colorado,1,2012,1,2000
6,Connecticut,1,2021,1,2012
7,Delaware,0,0,1,2011
8,District of Columbia,1,2015,1,2011
9,Florida,0,0,1,2016


In [6]:
legality_df.dtypes

State                 object
Recreational           int64
Rec Year Legalized     int64
Medical                int64
Med Year Legalized     int64
dtype: object

In [7]:
# recreationally legal
yrs_rec = legality_df[legality_df["Recreational"] == 1]
yrs_rec

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized
1,Alaska,1,2014,1,1998
2,Arizona,1,2020,1,2010
4,California,1,2016,1,1996
5,Colorado,1,2012,1,2000
6,Connecticut,1,2021,1,2012
8,District of Columbia,1,2015,1,2011
13,Illinois,1,2019,1,2013
19,Maine,1,2016,1,1999
21,Massachusetts,1,2016,1,2012
22,Michigan,1,2018,1,2008


In [8]:
yrs_rec['Years legal (rec)'] = 2022 - legality_df['Rec Year Legalized']
yrs_rec['Years legal (med)'] = 2022 - legality_df['Med Year Legalized']
yrs_rec

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yrs_rec['Years legal (rec)'] = 2022 - legality_df['Rec Year Legalized']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yrs_rec['Years legal (med)'] = 2022 - legality_df['Med Year Legalized']


Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
1,Alaska,1,2014,1,1998,8,24
2,Arizona,1,2020,1,2010,2,12
4,California,1,2016,1,1996,6,26
5,Colorado,1,2012,1,2000,10,22
6,Connecticut,1,2021,1,2012,1,10
8,District of Columbia,1,2015,1,2011,7,11
13,Illinois,1,2019,1,2013,3,9
19,Maine,1,2016,1,1999,6,23
21,Massachusetts,1,2016,1,2012,6,10
22,Michigan,1,2018,1,2008,4,14


In [9]:
# medically legal
yrs_med = legality_df[legality_df["Medical"] == 1]
yrs_med.head()

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized
0,Alabama,0,0,1,2021
1,Alaska,1,2014,1,1998
2,Arizona,1,2020,1,2010
3,Arkansas,0,0,1,2016
4,California,1,2016,1,1996


In [10]:
# Years medically legal
yrs_med['Years legal (med)'] = 2022 - legality_df['Med Year Legalized']
yrs_med

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yrs_med['Years legal (med)'] = 2022 - legality_df['Med Year Legalized']


Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (med)
0,Alabama,0,0,1,2021,1
1,Alaska,1,2014,1,1998,24
2,Arizona,1,2020,1,2010,12
3,Arkansas,0,0,1,2016,6
4,California,1,2016,1,1996,26
5,Colorado,1,2012,1,2000,22
6,Connecticut,1,2021,1,2012,10
7,Delaware,0,0,1,2011,11
8,District of Columbia,1,2015,1,2011,11
9,Florida,0,0,1,2016,6


In [14]:
# merge years_rec with years_med
years_df = pd.merge(yrs_rec, yrs_med, on=['Rec Year Legalized', 'State', 'Recreational', 'Medical', 'Years legal (med)', 'Med Year Legalized'], how='outer')
years_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
0,Alaska,1,2014,1,1998,8.0,24
1,Arizona,1,2020,1,2010,2.0,12
2,California,1,2016,1,1996,6.0,26
3,Colorado,1,2012,1,2000,10.0,22
4,Connecticut,1,2021,1,2012,1.0,10
5,District of Columbia,1,2015,1,2011,7.0,11
6,Illinois,1,2019,1,2013,3.0,9
7,Maine,1,2016,1,1999,6.0,23
8,Massachusetts,1,2016,1,2012,6.0,10
9,Michigan,1,2018,1,2008,4.0,14


In [15]:
# Replace NaN with 0
years_df = years_df.fillna(0)
years_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
0,Alaska,1,2014,1,1998,8.0,24
1,Arizona,1,2020,1,2010,2.0,12
2,California,1,2016,1,1996,6.0,26
3,Colorado,1,2012,1,2000,10.0,22
4,Connecticut,1,2021,1,2012,1.0,10
5,District of Columbia,1,2015,1,2011,7.0,11
6,Illinois,1,2019,1,2013,3.0,9
7,Maine,1,2016,1,1999,6.0,23
8,Massachusetts,1,2016,1,2012,6.0,10
9,Michigan,1,2018,1,2008,4.0,14


In [16]:
# Data type check
years_df.dtypes

State                  object
Recreational            int64
Rec Year Legalized      int64
Medical                 int64
Med Year Legalized      int64
Years legal (rec)     float64
Years legal (med)       int64
dtype: object

In [17]:
# Convert to int
years_df['Years legal (rec)'] = years_df['Years legal (rec)'].astype(int)

In [18]:
years_df.dtypes

State                 object
Recreational           int64
Rec Year Legalized     int64
Medical                int64
Med Year Legalized     int64
Years legal (rec)      int64
Years legal (med)      int64
dtype: object

In [19]:
years_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
0,Alaska,1,2014,1,1998,8,24
1,Arizona,1,2020,1,2010,2,12
2,California,1,2016,1,1996,6,26
3,Colorado,1,2012,1,2000,10,22
4,Connecticut,1,2021,1,2012,1,10
5,District of Columbia,1,2015,1,2011,7,11
6,Illinois,1,2019,1,2013,3,9
7,Maine,1,2016,1,1999,6,23
8,Massachusetts,1,2016,1,2012,6,10
9,Michigan,1,2018,1,2008,4,14


In [25]:
# merge with all states
legality_merged_df = pd.merge(years_df, legality_df, on=['Rec Year Legalized', 'State', 'Recreational', 'Medical', 'Med Year Legalized'], how='outer')
legality_merged_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
0,Alaska,1,2014,1,1998,8.0,24.0
1,Arizona,1,2020,1,2010,2.0,12.0
2,California,1,2016,1,1996,6.0,26.0
3,Colorado,1,2012,1,2000,10.0,22.0
4,Connecticut,1,2021,1,2012,1.0,10.0
5,District of Columbia,1,2015,1,2011,7.0,11.0
6,Illinois,1,2019,1,2013,3.0,9.0
7,Maine,1,2016,1,1999,6.0,23.0
8,Massachusetts,1,2016,1,2012,6.0,10.0
9,Michigan,1,2018,1,2008,4.0,14.0


In [26]:
# Replace NaN with 0
legality_merged_df = legality_merged_df.fillna(0)
legality_merged_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
0,Alaska,1,2014,1,1998,8.0,24.0
1,Arizona,1,2020,1,2010,2.0,12.0
2,California,1,2016,1,1996,6.0,26.0
3,Colorado,1,2012,1,2000,10.0,22.0
4,Connecticut,1,2021,1,2012,1.0,10.0
5,District of Columbia,1,2015,1,2011,7.0,11.0
6,Illinois,1,2019,1,2013,3.0,9.0
7,Maine,1,2016,1,1999,6.0,23.0
8,Massachusetts,1,2016,1,2012,6.0,10.0
9,Michigan,1,2018,1,2008,4.0,14.0


In [29]:
legality_merged_df['Years legal (rec)'] = legality_merged_df['Years legal (rec)'].astype(int)
legality_merged_df['Years legal (med)'] = legality_merged_df['Years legal (med)'].astype(int)
legality_merged_df

Unnamed: 0,State,Recreational,Rec Year Legalized,Medical,Med Year Legalized,Years legal (rec),Years legal (med)
0,Alaska,1,2014,1,1998,8,24
1,Arizona,1,2020,1,2010,2,12
2,California,1,2016,1,1996,6,26
3,Colorado,1,2012,1,2000,10,22
4,Connecticut,1,2021,1,2012,1,10
5,District of Columbia,1,2015,1,2011,7,11
6,Illinois,1,2019,1,2013,3,9
7,Maine,1,2016,1,1999,6,23
8,Massachusetts,1,2016,1,2012,6,10
9,Michigan,1,2018,1,2008,4,14


In [31]:
legality_merged_df.to_csv('../datasets/legality_merged.csv')