#### Michael Perrie
#### DSC 630 Predictive Analytics
#### Milestone III
#### Professor Farley
<h1><center>Los Angeles Crime Data Analysis</center></h1>

In [2]:
# Import Libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestRegressor, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.model_selection import TimeSeriesSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

import warnings

In [3]:
# Set display options for pandas.
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

<h1><center>Import and transform data</center></h1>

In [4]:
# Upload data and verify successful upload.
la_crime = pd.read_csv('/Users/michaelperrine/Library/CloudStorage/OneDrive-Personal/Documents/Predictive Analytics/LA-Crime-Data/crime_data copy.csv')
la_crime.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,Mocodes,Vict Age,Vict Sex,Vict Descent,Premis Cd,Premis Desc,Weapon Used Cd,Weapon Desc,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,201804032,1/1/2020 0:00,1/1/2020 0:00,1250,18,Southeast,1823,1,210,ROBBERY,1822 0344 0400 0342,50,M,B,102.0,SIDEWALK,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,210.0,,,,CENTURY,AVALON BL,33.9456,-118.2652
1,201804035,1/1/2020 0:00,1/1/2020 0:00,1430,18,Southeast,1802,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),0329 1810,53,F,B,501.0,SINGLE FAMILY DWELLING,,,IC,Invest Cont,745.0,,,,8700 S FIGUEROA ST,,33.9587,-118.2827
2,201904054,1/1/2020 0:00,1/1/2020 0:00,1300,19,Mission,1998,1,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),0325,39,F,H,405.0,CLOTHING STORE,,,AA,Adult Arrest,442.0,,,,7800 VAN NUYS BL,,34.2153,-118.4409
3,201804023,1/1/2020 0:00,1/1/2020 0:00,140,18,Southeast,1802,2,626,INTIMATE PARTNER - SIMPLE ASSAULT,2000 1813 0400 2033,34,M,H,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,626.0,,,,300 W 91ST PL,,33.9536,-118.2782
4,201704023,1/1/2020 0:00,1/1/2020 0:00,20,17,Devonshire,1785,2,624,BATTERY - SIMPLE ASSAULT,0416,17,M,H,103.0,ALLEY,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",AO,Adult Other,624.0,,,,18700 NORDHOFF ST,,34.2355,-118.5404


In [4]:
# Check data types.
la_crime.dtypes

DR_NO               int64
Date Rptd          object
DATE OCC           object
TIME OCC            int64
AREA                int64
AREA NAME          object
Rpt Dist No         int64
Part 1-2            int64
Crm Cd              int64
Crm Cd Desc        object
Mocodes            object
Vict Age            int64
Vict Sex           object
Vict Descent       object
Premis Cd         float64
Premis Desc        object
Weapon Used Cd    float64
Weapon Desc        object
Status             object
Status Desc        object
Crm Cd 1          float64
Crm Cd 2          float64
Crm Cd 3          float64
Crm Cd 4          float64
LOCATION           object
Cross Street       object
LAT               float64
LON               float64
dtype: object

In [5]:
# Count null values.
la_crime.isnull().sum()

DR_NO                   0
Date Rptd               0
DATE OCC                0
TIME OCC                0
AREA                    0
AREA NAME               0
Rpt Dist No             0
Part 1-2                0
Crm Cd                  0
Crm Cd Desc             0
Mocodes            151691
Vict Age                0
Vict Sex           144717
Vict Descent       144729
Premis Cd              16
Premis Desc           588
Weapon Used Cd     677816
Weapon Desc        677816
Status                  1
Status Desc             0
Crm Cd 1               11
Crm Cd 2           935904
Crm Cd 3          1002748
Crm Cd 4          1004998
LOCATION                0
Cross Street       850825
LAT                     0
LON                     0
dtype: int64

In [None]:
# drop redundant and null values.
la_crime.drop(['Date Rptd','Mocodes', 
                'Vict Sex',
                'Vict Descent', 
                'Premis Cd', 
                'Premis Desc',
                'Weapon Used Cd',
                'Weapon Desc',
                'Crm Cd 1',
                'Crm Cd 2',
                'Crm Cd 3',
                'Crm Cd 4',
                'Cross Street',
                'LOCATION',
                'AREA NAME',
                'Status',
                'Crm Cd Desc'], axis=1, inplace=True)

In [None]:
# Display column names.
la_crime.columns

In [None]:
# Rename columns.
la_crime = la_crime.rename(columns={'DR_NO':'dr_no',
                'DATE OCC':'date',
                'TIME OCC': 'time',
                'AREA':'area',
                'Rpt Dist No':'rpt_dist_no',
                'Part 1-2':'part_1_2',
                'Crm Cd':'crm_cd',
                'Vict Age':'vict_age',
                'Status Desc':'status_desc',
                'LAT':'lat',
                'LON':'lon'})

In [None]:
# View data types.
la_crime.dtypes

In [None]:
# Display cleaned dataset.
la_crime.head()

In [None]:
# Convert date column to datetime format.
la_crime['date'] = pd.to_datetime(la_crime['date'])



<h1><center>Filter Data by Year</center></h1>

In [None]:
# First group 2020.
la_crime_2020 = la_crime[(la_crime['date']>='2020/01/01') & (la_crime['date']<='2020/12/31')]
la_crime_2020['vict_age']= la_crime_2020['vict_age'].abs()
la_crime_2020.head()


In [None]:
# Dimension of 2020 data.
la_crime_2020.shape

In [None]:
# Second group 2021.
la_crime_2021 = la_crime[(la_crime['date']>='2021-/1/01') & (la_crime['date']<='2021/12/31')]
la_crime_2021['vict_age']= la_crime_2021['vict_age'].abs()
la_crime_2021.head()


In [None]:
# Dimension of 2021 data.
la_crime_2021.shape

In [None]:
# Third group 2022.
la_crime_2022 = la_crime[(la_crime['date']>='2022/01/01') & (la_crime['date']<='2022/12/31')]
la_crime_2022['vict_age']= la_crime_2022['vict_age'].abs()
la_crime_2022.head()


In [None]:
# Dimension of 2022 data.
la_crime_2022.shape

In [None]:
# Fourth group 2023.
la_crime_2023 = la_crime[(la_crime['date']>='2023/01/01') & (la_crime['date']<='2023/12/31')]
la_crime_2023['vict_age']= la_crime_2023['vict_age'].abs()
la_crime_2023.head()

In [None]:
# Dimension of 2023 data.
la_crime_2023.shape

In [None]:
# Fifth group 2024.
la_crime_2024 = la_crime[(la_crime['date']>='2024/01/01') & (la_crime['date']<='2024/12/31')]
la_crime_2024['vict_age']= la_crime_2024['vict_age'].abs()
la_crime_2024.head()

In [None]:
# Dimension of 2024 data.
la_crime_2024.shape

<h1><center>Create visualizations</center></h1>

In [None]:
# Set global parameters
plt.rcParams['figure.figsize'] = (10, 10)  # Set default figure size
plt.rcParams['font.size'] = 15             # Set default font size
plt.rcParams['lines.linewidth'] = 2        # Set default line width
plt.rcParams['axes.grid'] = True           # Enable grid by default

In [None]:
# Plot crime categories for LA County.
sns.histplot(data=la_crime, x = 'crm_cd', bins=50)
plt. title('Total Crime Categories in LA County', fontsize=20, weight='bold')
plt.xlabel('Crime Codes', weight='bold')
plt.ylabel('Totals', weight='bold')
sns.set_style("white")


plt.show()

In [None]:
# Plot status descriptions for LA County.
sns.histplot(data=la_crime, x = 'status_desc', bins=20)
plt. title('Total Status Descriptions in LA County', fontsize=20, weight='bold')
plt.xlabel('Status Descriptions', weight='bold')
plt.ylabel('Totals', weight='bold')
plt.xticks(rotation=45)
sns.set_style("white")

plt.show()

In [None]:
# Plot victim age distribution.
sns.histplot(data= la_crime, x = 'vict_age', bins = 20)
plt.title('Victim Age Distribution', fontsize=20, weight='bold')
plt.xlabel('Victim Age', weight='bold')
plt.ylabel('Crime Totals', weight='bold')
sns.set_style("white")
plt.show()

In [None]:
# Plot victim age distribution for 2020.
sns.histplot(data= la_crime_2020, x = 'vict_age', bins = 20)
plt.title('Victim Age Distribution in 2020', fontsize=20, weight='bold')
plt.xlabel('Victim Age', weight='bold')
plt.ylabel('Crime Totals', weight='bold')
sns.set_style("white")
plt.show()

In [None]:
# Scatterplot of victim age vs crime codes.
sns.scatterplot(data=la_crime_2020, x='crm_cd', y='vict_age')
plt.title('Victim Age VS Crime Codes', fontsize=20, weight='bold')
plt.xlabel('Crime Code', weight='bold')
plt.ylabel('Victim Age', weight='bold')
sns.set_style("white")
plt.show()

<h1><center>Build Models</center></h1>

K Nearest Neighbor

In [None]:
# View columns.
la_crime.columns

In [None]:
# Drop unneeded columns.
la_crime_knn = la_crime.drop(columns=['dr_no', 'date', 'time',])

In [None]:
# Create one hot encoding.
la_crime_knn = pd.get_dummies(la_crime_knn).replace({True: 1, False: 0})

In [None]:
# View dataframe with one hot encoding.
la_crime_knn.head()

In [None]:
# Create X and y variables.
X_knn = la_crime_knn.drop(columns=['crm_cd'])
y_knn = la_crime_knn['crm_cd']

In [None]:
# Split data in trainig and testing sets.
X_train_knn, X_test_knn, y_train_knn, y_test_knn = train_test_split(X_knn, y_knn, 
                                                                    test_size=0.20, 
                                                                    random_state=75)

In [None]:
# View training and testing data dimeensions.
print("X_train shape:\n", X_train_knn.shape)
print("\nXtest shape:\n", X_test_knn.shape)
print("\ny_train shape:\n", y_train_knn.shape)
print("\ny_test shape:\n", y_test_knn.shape)

In [None]:
# build knn model.
knn=KNeighborsClassifier(n_neighbors=20)

In [None]:
# Train the knn model
model_knn = knn.fit(X_train_knn,y_train_knn)

In [None]:
# Build the predition object.
y_pred_knn = model_knn.predict(X_test_knn)

In [None]:
# Calculate score for knn.
model_knn.score(X_test_knn, y_test_knn)

In [None]:
# Build confusion matrix.
cm = confusion_matrix(y_test_knn, y_pred_knn)
cm

In [None]:
# Print classification report.
print("Classification Report:\n", classification_report(y_test_knn, y_pred_knn))

KNN Model without categorical data.


In [None]:
# Create X and y variables.
X_knn_1 = la_crime.drop(columns=['dr_no','date','time','crm_cd', 'status_desc'])
y_knn_1 = la_crime['crm_cd']

In [None]:
# Split data into training and testing sets.
X_train_knn_1, X_test_knn_1, y_train_knn_1, y_test_knn_1 = train_test_split(X_knn_1, y_knn_1,
                                                                            test_size=0.20,
                                                                            random_state=76)

In [None]:
# Build knn model
knn_1=KNeighborsClassifier(n_neighbors=20)

In [None]:
# Train the model
model_knn_1 = knn_1.fit(X_train_knn_1,y_train_knn_1)

In [None]:
# Build prediction object.
y_pred_knn_1 = model_knn_1.predict(X_test_knn_1)

In [None]:
# Calculate model accuracy.
model_knn_1.score(X_test_knn_1, y_test_knn_1)

<h1><center>Multiple Linear Regression Model</center></h1>

In [None]:
la_crime.head()

In [None]:
# Create dummy variable for crime data.
la_crime_onehot = pd.get_dummies(la_crime).replace({True: 1, False: 0})

In [None]:
# View new dataframe with the dummy variables.
la_crime_onehot.head()

In [None]:
la_crime_onehot = la_crime_onehot.drop(columns=['dr_no', 
                                                'date',
                                                'time'], axis=1)

In [None]:
# Split the data into X and y variables.
X = la_crime_onehot.drop(columns=['crm_cd'], axis=1)
y= la_crime_onehot['crm_cd']

In [None]:
# View the X and y variables.
print("X Variables:\n", X.shape)
print("\ny Variable:\n", y.shape)

In [None]:
# Split the data into a training and testing set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 1)

In [None]:
# View training and testing data dimeensions.
print("X_train shape:\n", X_train.shape)
print("\nXtest shape:\n", X_test.shape)
print("\ny_train shape:\n", y_train.shape)
print("\ny_test shape:\n", y_test.shape)

In [None]:
# Create linear regression model.
lm= LinearRegression()

In [None]:
# Fit the data to the model.
model = lm.fit(X_train, y_train)

In [None]:
# Create the prediction object.
y_pred = lm.predict(X_test)

In [None]:
# Uses R squared to check model accuracy. 
r2 = r2_score(y_test, y_pred)
print("R-Squared Score is:\n", r2)

Regression with out categorical variables

In [None]:
# Displays first five rows.
la_crime.head()

In [None]:
# Removes unneeded columns.
la_crime = la_crime.drop(columns=['dr_no','date', 'time','status_desc'], axis=1)

In [None]:
# Split the data into X and y variables.
X_1 = la_crime.drop(columns=['crm_cd'], axis=1)
y_1= la_crime['crm_cd']

In [None]:
# Split the data into a training and testing set.
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_1, y_1, test_size= 0.2, random_state= 50)

In [None]:
# Build linear model.
lm_1= LinearRegression()

In [None]:
# Fit the data to the model.
model_1 = lm_1.fit(X_train_1, y_train_1)

In [None]:
# Create the prediction model.
y_pred_1 = lm_1.predict(X_test_1)

In [None]:
# Check the accuracy of the model.
r2_1 = r2_score(y_test_1, y_pred_1)
print("R-Squared Score is:\n", r2_1)

<h1><center>Random Forest Classifier</center></h1>

In [None]:
# Create dummy variables for the random forest model.
la_crime_onehot_1 = pd.get_dummies(la_crime)
la_crime_onehot_1 = la_crime_onehot.replace({True: 1, False: 0})

In [None]:
# View the columns in the data.
la_crime_onehot_1.columns

In [None]:
# Split data in X and y variables.
X = la_crime_onehot_1.drop(columns=['crm_cd'])
y = la_crime_onehot_1['crm_cd']

In [None]:
# Split the data into a training and testing set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 2)

In [None]:
# Ceate Random Forest Classifier object.
rf = RandomForestClassifier()

In [None]:
# Create Random Forest Model.
rf_model = rf.fit(X_train, y_train)

In [None]:
# Predict using the Random Forest Model.
y_pred_rf = rf_model.predict(X_test)

In [None]:
# Check the accuracy of the model.
accuracy = rf_model.score(X_test, y_test)
print("Random Forest Model Accuracy is:\n", accuracy)

In [None]:
# View the classification report for random forest model.
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

In [None]:
# Checks to see which features are most important.
features = pd.DataFrame({'Feature': X_train.columns, 'Importance': rf_model.feature_importances_})

In [None]:
# Views the feature importance values.
features.head(12)

<h1><center>Random Forest with hyperparameters</center></h1>

In [None]:
# Create the X and y variables
X1 = la_crime_onehot_1.drop(columns=['crm_cd', 'status_desc_Adult Arrest', 'status_desc_Adult Other',
       'status_desc_Invest Cont', 'status_desc_Juv Arrest',
       'status_desc_Juv Other', 'status_desc_UNK'])
y1 = la_crime_onehot_1['crm_cd']

In [None]:
# Split data to train and test data.
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size= 0.2, random_state= 24)

In [None]:
# Refine the Random Forest Classifier with hyperparameters.
rf_2 = RandomForestClassifier(n_estimators = 100,
                              criterion='entropy',
                              max_depth= 10,
                              min_samples_split= 2,
                              random_state = 3)

In [None]:
# Fit the data to the model.
rf_model_2 = rf_2.fit(X1_train, y1_train)

In [None]:
# Checks the accuracy of the model.
accuracy_1 = rf_model_2.score(X1_test, y1_test)
print("Refined Random Forest Model Accuracy is:\n", accuracy_1)

<h1><center>Models with Segmented Data</center><h1>

2020 Regression 


In [None]:
# Create one hot encoding on the categorical variables.
la_crime_2020_oneshot = pd.get_dummies(la_crime_2020).replace({True: 1, False: 0})

In [None]:
# View first five rows.
la_crime_2020_oneshot.head()

In [None]:
# Create X and y variables.
X_2020 = la_crime_2020_oneshot.drop(columns=['dr_no', 
                                             'date', 
                                             'time',
                                             'crm_cd'], axis = 1)
y_2020 = la_crime_2020_oneshot['crm_cd']

In [None]:
# Create train and test data.
X_train_2020, X_test_2020, y_train_2020, y_test_2020 = train_test_split(X_2020, y_2020, 
                                                                        test_size= 0.20,
                                                                        random_state = 40)

In [None]:
# Create linear regression model.
lm_2020 = LinearRegression()

In [None]:
# Fit the data to the model.
model_2020 = lm_2020.fit(X_train_2020, y_train_2020)

In [None]:
# Create the prediction object.
y_pred_2020 = lm_2020.predict(X_test_2020)

In [None]:
# check accuracy of model.
lm_2020_acc = r2_score(y_test_2020, y_pred_2020)

In [None]:
# View the score of the model.
lm_2020_acc

2020 KNN

In [None]:
# Prepare dataframe for analysis.
la_crime_2020_oneshot = pd.get_dummies(la_crime_2020).replace({True: 1, False: 0})
la_crime_2020_knn = la_crime_2020_oneshot

In [None]:
# Create X and y variables for KNN.
X_2020_knn = la_crime_2020_knn.drop(columns = ['dr_no',
                                               'date',
                                               'time',
                                               'crm_cd'], axis=1)
y_2020_knn = la_crime_2020_knn['crm_cd']

In [None]:
# Split data into train and test sets.
X_train_2020_knn, X_test_2020_knn, y_train_2020_knn, y_test_2020_knn = train_test_split(X_2020_knn, 
                                                                                        y_2020_knn, 
                                                                        test_size= 0.20,
                                                                        random_state = 41)

In [None]:
# Create KNN model.
knn_2020=KNeighborsClassifier()


In [None]:
# Fit the data to the model.
model_knn_2020 = knn_2020.fit(X_train_2020_knn, y_train_2020_knn)

In [None]:
# Create the prediction object.
y_pred_knn_2020 = model_knn_2020.predict(X_test_2020_knn)

In [None]:
# Check the accuracy of the model
model_knn_2020.score(X_test_2020_knn, y_test_2020_knn)

2020 Random Forest

In [None]:
# Prepare the data for analysis.
la_crime_2020_oneshot = pd.get_dummies(la_crime_2020).replace({True: 1, False: 0})
la_crime_2020_rf = la_crime_2020_oneshot

In [None]:
# Split data into X and y variables.
X_2020_rf = la_crime_2020_rf.drop(columns=['dr_no',
                                           'date',
                                           'time',
                                           'crm_cd'], axis = 1)
y_2020_rf = la_crime_2020_rf['crm_cd']

In [None]:
# Split data into train and test sets.
X_train_2020_rf, X_test_2020_rf, y_train_2020_rf, y_test_2020_rf = train_test_split(X_2020_rf,
                                                                                    y_2020_rf,
                                                                                    test_size= 0.20,
                                                                                    random_state=0)

In [None]:
# Build the random forest model.
rf_2020 = RandomForestClassifier(n_estimators = 100,
                              criterion='entropy',
                              max_depth= 10,
                              min_samples_split= 2,
                              random_state = 0)

In [None]:
# Fit the data to the model.
rf_model_2020 = rf_2020.fit(X_train_2020_rf, y_train_2020_rf)

In [None]:
# Check for model accuracy.
accuracy_2020 = rf_model_2020.score(X_test_2020_rf, y_test_2020_rf)
print("Refined Random Forest Model Accuracy is:\n", accuracy_2020)

2021 Regression

In [None]:
# Prep data for analysis.
la_crime_2021_oneshot = pd.get_dummies(la_crime_2021).replace({True: 1, False: 0})

In [None]:
# Split data in X and y variables.
X_2021 = la_crime_2021_oneshot.drop(columns=['dr_no', 
                                             'date', 
                                             'time',
                                             'crm_cd'], axis = 1)
y_2021 = la_crime_2021_oneshot['crm_cd']

In [None]:
# Split data into train and test sets.
X_train_2021, X_test_2021, y_train_2021, y_test_2021 = train_test_split(X_2021, y_2021, 
                                                                        test_size= 0.20,
                                                                        random_state = 100)

In [None]:
# Create linear regression model.
lm_2021 = LinearRegression()

In [None]:
# Fit the data to the model.
model_2021 = lm_2021.fit(X_train_2021, y_train_2021)


In [None]:
# Create a prediction model.
y_pred_2021 = lm_2021.predict(X_test_2021)

In [None]:
# Check for model accuracy.
lm_2021_acc = r2_score(y_test_2021, y_pred_2021)

In [None]:
# View the score for the model
lm_2021_acc

2021 K Nearest Neighbor

In [None]:
# Prep data for analysis
la_crime_2021_oneshot = pd.get_dummies(la_crime_2021).replace({True: 1, False: 0})

In [None]:
# Create X and y variables.
X_2021_knn = la_crime_2021_oneshot.drop(columns=['dr_no', 
                                             'date', 
                                             'time',
                                             'crm_cd'], axis = 1)
y_2021_knn = la_crime_2021_oneshot['crm_cd']

In [None]:
# Split data into train and test sets.
X_train_2021_knn, X_test_2021_knn, y_train_2021_knn, y_test_2021_knn = train_test_split(X_2021_knn,
                                                                                        y_2021_knn, 
                                                                                        test_size= 0.20,
                                                                                        random_state = 101)

In [None]:
# Build the KNN model.
knn_2021=KNeighborsClassifier()

In [None]:
# Fit the data to the model.
model_knn_2021 = knn_2021.fit(X_train_2021_knn, y_train_2021_knn)

In [None]:
# Create prediction object.
y_pred_knn_2021 = model_knn_2021.predict(X_test_2021_knn)

In [None]:
# Check model accuracy.
model_knn_2021.score(X_test_2021_knn, y_test_2021_knn)

2021 Random Forest

In [None]:
# Prep data for analysis.
la_crime_2021_oneshot = pd.get_dummies(la_crime_2021).replace({True: 1, False: 0})

In [None]:
# Create X and y variables.
X_2021_rf = la_crime_2021_oneshot.drop(columns=['dr_no', 
                                             'date', 
                                             'time',
                                             'crm_cd'], axis = 1)
y_2021_rf = la_crime_2021_oneshot['crm_cd']

In [None]:
# Split data into train and test sets.
X_train_2021_rf, X_test_2021_rf, y_train_2021_rf, y_test_2021_rf = train_test_split(X_2021_rf,
                                                                                        y_2021_rf, 
                                                                                        test_size= 0.20,
                                                                                        random_state = 102)

In [None]:
# Create random forest model.
rf_2021=RandomForestClassifier(n_estimators = 100,
                              criterion='entropy',
                              max_depth= 10,
                              min_samples_split= 2,
                              random_state = 0)

In [None]:
# Fit data to the model.
rf_model_2021 = rf_2021.fit(X_train_2021_rf, y_train_2021_rf)

In [None]:
# Check the accuracy of the model.
accuracy_2021 = rf_model_2021.score(X_test_2021_rf, y_test_2021_rf)
print("Refined Random Forest Model Accuracy is:\n", accuracy_2021)