In [None]:
import pandas as pd

In [None]:
train = pd.read_csv("/content/drive/MyDrive/Mini project/train.csv")
print ("Train Dataset: Rows, Columns: ", train.shape)

In [None]:
train.head()

In [None]:
train.tail()

In [None]:
print ("Summary of Train Dataset: ")
train.describe()

In [None]:
train.isnull().sum()

In [None]:
print ("Top Columns having missing values")
missingvalues = train.isnull().sum().to_frame()
missvaluesmissingvalues =missingvalues.sort_values(0, ascending = False)
missvaluesmissingvalues.head()

In [None]:
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
target = train['Target'].value_counts().to_frame()
levels = ["NonVulnerable", "Moderate Poverty", "Vulnerable", "Extereme Poverty"]
trace = go.Bar(y=target.Target, x=levels, marker=dict(color='red', opacity=0.6))
layout = dict(title="Household Poverty Levels", margin=dict(l=200), width=800, height=400)
data = [trace]
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
train['Target'].tail(50)

In [None]:
from plotly import tools
import pandas as pd
import numpy as np
import plotly.figure_factory as ff

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
def compare_plot(col, title):
    tr1 = train[train['Target'] == 1][col].value_counts().to_dict()
    tr2 = train[train['Target'] == 2][col].value_counts().to_dict()
    tr3 = train[train['Target'] == 3][col].value_counts().to_dict()
    tr4 = train[train['Target'] == 4][col].value_counts().to_dict()

    xx = ['Extereme', 'Moderate', 'Vulnerable', 'NonVulnerable']
    trace1 = go.Bar(y=[tr1[0], tr2[0], tr3[0], tr4[0]], name="Not Present", x=xx, marker=dict(color="orange", opacity=0.6))
    trace2 = go.Bar(y=[tr1[1], tr2[1], tr3[1], tr4[1]], name="Present", x=xx, marker=dict(color="purple", opacity=0.6))

    return trace1, trace2

tr1, tr2 = compare_plot("v18q", "Tablet")
tr3, tr4 = compare_plot("refrig", "Refrigirator")
tr5, tr6 = compare_plot("computer", "Computer")
tr7, tr8 = compare_plot("television", "Television")
tr9, tr10 = compare_plot("mobilephone", "MobilePhone")
titles = ["Tablet", "Refrigirator", "Computer", "Television", "MobilePhone"]

fig = tools.make_subplots(rows=3, cols=2, print_grid=False, subplot_titles=titles)
fig.append_trace(tr1, 1, 1)
fig.append_trace(tr2, 1, 1)
fig.append_trace(tr3, 1, 2)
fig.append_trace(tr4, 1, 2)
fig.append_trace(tr5, 2, 1)
fig.append_trace(tr6, 2, 1)
fig.append_trace(tr7, 2, 2)
fig.append_trace(tr8, 2, 2)
fig.append_trace(tr9, 3, 1)
fig.append_trace(tr10, 3, 1)

fig['layout'].update(height=1000, title="What do Households Own", barmode="stack", showlegend=False)
iplot(fig)

In [None]:
#Household Materials and Methods
def find_prominent(row, mats):
    for c in mats:
        if row[c] == 1:
            return c
    return

def combine(starter, colname, title, replacemap):
    mats = [c for c in train.columns if c.startswith(starter)]
    train[colname] = train.apply(lambda row : find_prominent(row, mats), axis=1)
    train[colname] = train[colname].apply(lambda x : replacemap[x] if x != None else x )

    om1 = train[train['Target'] == 1][colname].value_counts().to_frame()
    om2 = train[train['Target'] == 2][colname].value_counts().to_frame()
    om3 = train[train['Target'] == 3][colname].value_counts().to_frame()
    om4 = train[train['Target'] == 4][colname].value_counts().to_frame()

    trace1 = go.Bar(y=om1[colname], x=om1.index, name="Extereme", marker=dict(color='red', opacity=0.9))
    trace2 = go.Bar(y=om2[colname], x=om2.index, name="Moderate", marker=dict(color='red', opacity=0.5))
    trace3 = go.Bar(y=om3[colname], x=om3.index, name="Vulnerable", marker=dict(color='green', opacity=0.5))
    trace4 = go.Bar(y=om4[colname], x=om4.index, name="NonVulnerable", marker=dict(color='green', opacity=0.9))
    return [trace1, trace2, trace3, trace4]

titles = ["Outside Wall Material", "Floor Material", "Roof Material", "Cooking Energy Sources"]
fig = tools.make_subplots(rows=2, cols=2, print_grid=False, subplot_titles=titles)

### outside material
flr = {'paredblolad' : "Block / Brick", "paredpreb" : "Cement", "paredmad" : "Wood",
      "paredzocalo" : "Socket", "pareddes" : "Waste Material", "paredfibras" : "Fibres",
      "paredother" : "Other", "paredzinc": "Zink"}
res = combine("pared", "outside_material", "Predominanat Material of the External Walls", flr)
for x in res:
    fig.append_trace(x, 1, 1)

### floor material
flr = {'pisomoscer' : "Mosaic / Ceramic", "pisocemento" : "Cement", "pisonatur" : "Natural Material",
      "pisonotiene" : "No Floor", "pisomadera" : "Wood", "pisoother" : "Other"}
res = combine("piso", "floor_material", "Floor Material of the Households", flr)
for x in res:
    fig.append_trace(x, 1, 2)

### Roof Material
flr = {'techozinc' : "Zinc", "techoentrepiso" : "Fibre / Cement", "techocane" : "Natural Fibre", "techootro" : "Other"}
res = combine("tech", "roof_material", "Roof Material of the Households", flr)
for x in res:
    fig.append_trace(x, 2, 1)


### Energy Source
flr = {'energcocinar1' : "No Kitchen", "energcocinar2" : "Electricity", "energcocinar3" : "Cooking Gas",
       "energcocinar4" : "Wood Charcoal"}
res = combine("energ", "energy_source", "Main source of energy for cooking", flr)
for x in res:
    fig.append_trace(x, 2, 2)

fig['layout'].update(height=900, title="Key Characteristics of Households", barmode="stack", showlegend=False)
iplot(fig)

In [None]:
#Quality of Walls, Roof, and Floor
def find_prominent2(row, mats):
    for i,c in enumerate(mats):
        if row[c] == 1 and c.endswith("1"):
            return "Bad"
        elif row[c] == 1 and c.endswith("2"):
            return "Regular"
        elif row[c] == 1 and c.endswith("3"):
            return "Good"
    return

badwl = [c for c in train.columns if c.startswith("epar")]
badrf = [c for c in train.columns if c.startswith("etec")]
badfl = [c for c in train.columns if c.startswith("eviv")]
train["WallQuality"] = train.apply(lambda row : find_prominent2(row, badwl), axis=1)
train["RoofQuality"] = train.apply(lambda row : find_prominent2(row, badrf), axis=1)
train["FloorQuality"] = train.apply(lambda row : find_prominent2(row, badfl), axis=1)

wd1 = train[train['Target']==1]['WallQuality'].value_counts()
wd2 = train[train['Target']==2]['WallQuality'].value_counts()
wd3 = train[train['Target']==3]['WallQuality'].value_counts()
wd4 = train[train['Target']==4]['WallQuality'].value_counts()
trace1=go.Bar(x=wd1.index, y=wd1.values, marker=dict(color="red", opacity=0.99), name="Extereme")
trace2=go.Bar(x=wd2.index, y=wd2.values, marker=dict(color="red", opacity=0.69), name="Moderate")
trace3=go.Bar(x=wd3.index, y=wd3.values, marker=dict(color="red", opacity=0.49), name="Vulnerable")
trace4=go.Bar(x=wd4.index, y=wd4.values, marker=dict(color="red", opacity=0.29), name="NonVulnerable")

wd1 = train[train['Target']==1]['RoofQuality'].value_counts()
wd2 = train[train['Target']==2]['RoofQuality'].value_counts()
wd3 = train[train['Target']==3]['RoofQuality'].value_counts()
wd4 = train[train['Target']==4]['RoofQuality'].value_counts()
trace5=go.Bar(x=wd1.index, y=wd1.values, marker=dict(color="green", opacity=0.99), name="Extereme")
trace6=go.Bar(x=wd2.index, y=wd2.values, marker=dict(color="green", opacity=0.69), name="Moderate")
trace7=go.Bar(x=wd3.index, y=wd3.values, marker=dict(color="green", opacity=0.49), name="Vulnerable")
trace8=go.Bar(x=wd4.index, y=wd4.values, marker=dict(color="green", opacity=0.29), name="NonVulnerable")

wd1 = train[train['Target']==1]['FloorQuality'].value_counts()
wd2 = train[train['Target']==2]['FloorQuality'].value_counts()
wd3 = train[train['Target']==3]['FloorQuality'].value_counts()
wd4 = train[train['Target']==4]['FloorQuality'].value_counts()
trace9=go.Bar(x=wd1.index, y=wd1.values, marker=dict(color="purple", opacity=0.99), name="Extereme")
trace10=go.Bar(x=wd2.index, y=wd2.values, marker=dict(color="purple", opacity=0.69), name="Moderate")
trace11=go.Bar(x=wd3.index, y=wd3.values, marker=dict(color="purple", opacity=0.49), name="Vulnerable")
trace12=go.Bar(x=wd4.index, y=wd4.values, marker=dict(color="purple", opacity=0.29), name="NonVulnerable")

fig = tools.make_subplots(rows=1, cols=4, print_grid=False, subplot_titles=["Extereme Poverty", "Moderate Poverty", "Vulnerable", "NonVulnerable"])
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 1, 4)
fig['layout'].update(height=250, showlegend=False, title="Wall Quality of Households")
iplot(fig)

fig = tools.make_subplots(rows=1, cols=4, print_grid=False, subplot_titles=["Extereme Poverty", "Moderate Poverty", "Vulnerable", "NonVulnerable"])
fig.append_trace(trace5, 1, 1)
fig.append_trace(trace6, 1, 2)
fig.append_trace(trace7, 1, 3)
fig.append_trace(trace8, 1, 4)
fig['layout'].update(height=250, showlegend=False, title="Roof Quality of Households")
iplot(fig)

fig = tools.make_subplots(rows=1, cols=4, print_grid=False, subplot_titles=["Extereme Poverty", "Moderate Poverty", "Vulnerable", "NonVulnerable"])
fig.append_trace(trace9, 1, 1)
fig.append_trace(trace10, 1, 2)
fig.append_trace(trace11, 1, 3)
fig.append_trace(trace12, 1, 4)
fig['layout'].update(height=250, showlegend=False, title="Floor Quality of Households")
iplot(fig)

In [None]:
# Education Details, Status, and Members
def combine2(starter, colname, title, replacemap, plotme = True):
    mats = [c for c in train.columns if c.startswith(starter)]
    train[colname] = train.apply(lambda row : find_prominent(row, mats), axis=1)
    train[colname] = train[colname].apply(lambda x : replacemap[x] if x != None else x )

    om1 = train[train['Target'] == 1][colname].value_counts().to_frame()
    om2 = train[train['Target'] == 2][colname].value_counts().to_frame()
    om3 = train[train['Target'] == 3][colname].value_counts().to_frame()
    om4 = train[train['Target'] == 4][colname].value_counts().to_frame()

    trace1 = go.Bar(y=om1[colname], x=om1.index, name="Extereme", marker=dict(color='red', opacity=0.9))
    trace2 = go.Bar(y=om2[colname], x=om2.index, name="Moderate", marker=dict(color='red', opacity=0.5))
    trace3 = go.Bar(y=om3[colname], x=om3.index, name="Vulnerable", marker=dict(color='orange', opacity=0.9))
    trace4 = go.Bar(y=om4[colname], x=om4.index, name="NonVulnerable", marker=dict(color='orange', opacity=0.5))

    data = [trace1, trace2, trace3, trace4]
    layout = dict(title=title, legend=dict(y=1.1, orientation="h"), barmode="stack", margin=dict(l=50), height=400)
    fig = go.Figure(data=data, layout=layout)
    if plotme:
        iplot(fig)


flr = {"instlevel1": "No Education", "instlevel2": "Incomplete Primary", "instlevel3": "Complete Primary",
       "instlevel4": "Incomplete Sc.", "instlevel5": "Complete Sc.", "instlevel6": "Incomplete Tech Sc.",
       "instlevel7": "Complete Tech Sc.", "instlevel8": "Undergraduation", "instlevel9": "Postgraduation"}
combine2("instl", "education_details", "Education Details of Family Members", flr)

flr = {"estadocivil1": "< 10 years", "estadocivil2": "Free / Coupled union", "estadocivil3": "Married",
       "estadocivil4": "Divorced", "estadocivil5": "Separated", "estadocivil6": "Widow",
       "estadocivil7": "Single"}
combine2("estado", "status_members", "Status of Family Members", flr)

flr = {"parentesco1": "Household Head", "parentesco2": "Spouse/Partner", "parentesco3": "Son/Daughter",
       "parentesco4": "Stepson/Daughter", "parentesco5" : "Son/Daughter in Law" , "parentesco6": "Grandson/Daughter",
       "parentesco7": "Mother/Father", "parentesco8": "Mother/Father in Law", "parentesco9" : "Brother/Sister" ,
       "parentesco10" : "Brother/Sister in law", "parentesco11" : "Other Family Member", "parentesco12" : "Other Non Family Member"}
combine2("parentesc", "family_members", "Family Members in the Households", flr)

flr = {"lugar1": "Tumakuru", "lugar2": "Chikkanayakanahalli", "lugar3": "Gubbi",
       "lugar4": "Kunigal", "lugar5": "Tiptur", "lugar6": "Turuvekere"}
combine2("lugar", "region", "Region of the Households", flr, plotme=False)

In [None]:
# Gender and Age Distributions
def agbr(col):
    temp1 = train[train['Target'] == 1][col].value_counts()
    trace1 = go.Bar(x=temp1.index, y=temp1.values, marker=dict(color="red", opacity=0.89), name="Extereme")

    temp2 = train[train['Target'] == 2][col].value_counts()
    trace2 = go.Bar(x=temp2.index, y=temp2.values, marker=dict(color="orange", opacity=0.79), name="Moderate")

    temp3 = train[train['Target'] == 3][col].value_counts()
    trace3 = go.Bar(x=temp3.index, y=temp3.values, marker=dict(color="purple", opacity=0.89), name="Vulnerable")

    temp4 = train[train['Target'] == 4][col].value_counts()
    trace4 = go.Bar(x=temp4.index, y=temp4.values, marker=dict(color="green", opacity=0.79), name="NonVulnerable")

    return [trace1, trace2, trace3, trace4]
    layout = dict(height=400)
    fig = go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)
    iplot(fig)

titles = ["Total Persons", "< 12 Yrs", ">= 12 Yrs", "Total Males", "Males < 12 Yrs", "Males >= 12 Yrs",
         "Total Females", "Females < 12 Yrs", "Females >= 12 Yrs"]
fig = tools.make_subplots(rows=3, cols=3, print_grid=False, subplot_titles=titles)

res = agbr('r4t1')
for x in res:
    fig.append_trace(x, 1, 1)
res = agbr('r4t2')
for x in res:
    fig.append_trace(x, 1, 2)
res = agbr('r4t3')
for x in res:
    fig.append_trace(x, 1, 3)

res = agbr('r4h1')
for x in res:
    fig.append_trace(x, 2, 1)
res = agbr('r4h2')
for x in res:
    fig.append_trace(x, 2, 2)
res = agbr('r4h3')
for x in res:
    fig.append_trace(x, 2, 3)

res = agbr('r4m1')
for x in res:
    fig.append_trace(x, 3, 1)
res = agbr('r4m2')
for x in res:
    fig.append_trace(x, 3, 2)
res = agbr('r4m3')
for x in res:
    fig.append_trace(x, 3, 3)


fig['layout'].update(height=750, showlegend=False, title="Gender and Age Distributions")
iplot(fig)

In [None]:
#Household Size
tm = agbr('tamhog')
layout = dict(title="Household People Size", margin=dict(l=100), height=400, legend=dict(orientation="h", y=1))
fig = go.Figure(data=tm, layout=layout)
iplot(fig)

In [None]:
train

In [None]:
# AreaType with respect to Poverty Levels
train['area_type'] = train['area1'].apply(lambda x: "urbal" if x==1 else "rural")

cols = ['area_type', 'Target']
colmap = sns.light_palette("yellow", as_cmap=True)
pd.crosstab(train[cols[1]], train[cols[0]]).style.background_gradient(cmap = colmap)

In [None]:
cols = ['region', 'Target']
colmap = sns.light_palette("orange", as_cmap=True)
pd.crosstab(train[cols[0]], train[cols[1]]).style.background_gradient(cmap = colmap)

In [None]:
train["region"]

In [None]:
target1 = train['region'].value_counts().to_frame()
target1

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/Mini project/train.csv")

# Select squared and non-squared variables
squared_vars = ["SQBmeaned", "SQBescolari", "agesq", "SQBage", "SQBhogar_total", "SQBedjefe", "SQBhogar_nin", "SQBovercrowding", "SQBdependency"]
non_squared_vars = ["meaneduc", "escolari", "age", "hogar_total", "hogar_nin", "dependency", "overcrowding"]

# Subset the DataFrame with squared and non-squared variables
subset_df = df[squared_vars + non_squared_vars]

# Compute the correlation matrix
corr_matrix = subset_df.corr()

# Set up the figure size and style
plt.figure(figsize=(10, 8))
sns.set(font_scale=0.8)
sns.set_style("whitegrid")

# Create the heatmap
sns.heatmap(corr_matrix, annot=True, cmap=sns.cm.rocket_r, linewidths=0.5, square=True, cbar=True)

# Customize the plot
plt.title('Correlation Heatmap (Squared vs. Non-squared Variables)', fontsize=16)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
squared_vars = ["SQBmeaned", "SQBescolari","agesq", "SQBage", "SQBhogar_total", "SQBedjefe", "SQBhogar_nin", "SQBovercrowding", "SQBdependency"]
df.drop(squared_vars, axis=1, inplace=True)

# Combine electricity features
electricity_features = ["noelec", "coopele", "public", "planpri"]
df["elec"] = df[electricity_features].idxmax(axis=1)
df["elec"] = df["elec"].map({"noelec": 1, "coopele": 2, "public": 3, "planpri": 4}).fillna(0)

# Drop redundant features
redundant_features = ["female", "area2"]
df.drop(redundant_features, axis=1, inplace=True)

In [None]:
df

In [None]:
df['walls'] = df[['epared1', 'epared2', 'epared3']].idxmax(axis=1).str[-1].astype(int)
df['roof'] = df[['etecho1', 'etecho2', 'etecho3']].idxmax(axis=1).str[-1].astype(int)
df['floor'] = df[['eviv1', 'eviv2', 'eviv3']].idxmax(axis=1).str[-1].astype(int)

df.drop(columns=['epared1', 'epared2', 'epared3', 'etecho1', 'etecho2', 'etecho3', 'eviv1', 'eviv2', 'eviv3'], inplace=True)

In [None]:
df

In [None]:
# pip install boruta

In [None]:
# from boruta import BorutaPy
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.impute import SimpleImputer
# from sklearn.preprocessing import LabelEncoder

# target = df['Target']
# df = df.drop(columns=['Target'])

# # Select the numeric features from your dataset
# numeric_features = df.select_dtypes(include=['float64', 'int64'])

# # Encode categorical variables if any
# categorical_features = df.select_dtypes(include=['object'])
# label_encoder = LabelEncoder()
# encoded_categorical_features = categorical_features.apply(label_encoder.fit_transform)

# # Concatenate the numeric and encoded categorical features
# features = pd.concat([numeric_features, encoded_categorical_features], axis=1)

# # Handle missing values
# imputer = SimpleImputer(strategy='most_frequent')  # Replace missing values with the most frequent value of each column
# features_imputed = imputer.fit_transform(features)

# # Initialize the Boruta feature selector
# boruta_selector = BorutaPy(RandomForestClassifier(), n_estimators='auto', verbose=2, random_state=1)

# # Perform feature selection
# boruta_selector.fit(features_imputed, target.values)

# # Get the selected features
# selected_features = features.columns[boruta_selector.support_].tolist()

# # Print the selected features
# print("Selected Features:")
# for feature in selected_features:
#     print(feature)

In [None]:
# pip install boruta

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Define the features and target columns
features = df[['rooms', 'r4h1', 'r4h2', 'r4h3', 'r4m1', 'r4m2', 'r4m3', 'r4t1', 'r4t2', 'cielorazo', 'hogar_nin', 'overcrowding', 'escolari', 'walls', 'roof', 'floor']]
df2 = pd.read_csv("/content/drive/MyDrive/Mini project/train.csv")
target = df2['Target']

In [None]:
features

In [None]:
features.to_csv('features1.csv',index=False)

In [None]:
df3 = pd.read_csv('/content/features1.csv')
features_names = ['rooms','males_12_younger','males_12_older','total_males','females_12_younger','females_12_older','total_females','persons_12_younger','persons_12_older','ceiling','num_children','overcrowding','years_of_schooling','walls','roof','floor'];
df3.columns = features_names

In [None]:
df3

In [None]:
target

In [None]:
df3.isnull().sum()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=1)
RFC = RandomForestClassifier(random_state=1)
RFC.fit(X_train, y_train)
y_pred_RFC = RFC.predict(X_test)

# from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, y_pred_RFC, average=None)
recall = recall_score(y_test, y_pred_RFC, average=None)
f1 = f1_score(y_test, y_pred_RFC, average=None)

print("Precision for each class:", precision)
print("Recall for each class:", recall)
print("F1 Score for each class:", f1)

In [None]:
row_data = X_test.iloc[0]
row_data

In [None]:
new_instance_data = [6, 0, 2, 2, 1, 2,3,1, 4, 1, 1, 1.25, 8, 2, 2, 2]
new_instance_data_2d = np.array(new_instance_data).reshape(1, -1)
predicted_target = RFC.predict(new_instance_data_2d)
print("Predicted Target Value: {}".format(int(predicted_target[0])))

In [None]:
len(new_instance_data)

In [None]:
import pickle
import joblib
import numpy as np

# Save the model using joblib
joblib.dump(RFC, 'random_forest_classifier_model.pkl')

# Load the trained model
loaded_model = joblib.load("random_forest_classifier_model.pkl")

# Perform prediction
new_instance_data = [6, 0, 2, 2, 1, 2, 3, 1, 4, 1, 1, 1.25, 8, 2, 2, 2]
new_instance_data_2d = np.array(new_instance_data).reshape(1, -1)
predicted_target = loaded_model.predict(new_instance_data_2d)
print("Predicted Target Value: {}".format(int(predicted_target[0])))

In [None]:
import pickle
import numpy as np

# Save the model using pickle
with open('random_forest_classifier_model.pkl', 'wb') as file:
    pickle.dump(RFC, file)

# Load the trained model
with open('random_forest_classifier_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

# Perform prediction
new_instance_data = [6, 0, 2, 2, 1, 2, 3, 1, 4, 1, 1, 1.25, 8, 2, 2, 2]
new_instance_data_2d = np.array(new_instance_data).reshape(1, -1)
predicted_target = loaded_model.predict(new_instance_data_2d)
print("Predicted Target Value: {}".format(int(predicted_target[0])))

In [None]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier(random_state=1)
DT.fit(X_train, y_train)
y_pred_DT = DT.predict(X_test)
acc_DT = DT.score(X_test, y_test)

from sklearn.metrics import precision_score, recall_score, f1_score

precision_dtc = precision_score(y_test, y_pred_DT, average=None)
recall_dtc = recall_score(y_test, y_pred_DT, average=None)
f1_dtc = f1_score(y_test, y_pred_DT, average=None)

print("DTC Precision for each class:", precision_dtc)
print("DTC Recall for each class:", recall_dtc)
print("DTC F1 Score for each class:", f1_dtc)

In [None]:
new_instance_data = [6, 0, 2, 2, 1, 2,3,1, 4, 1, 1, 1.25, 8, 2, 2, 2]
new_instance_data_2d = np.array(new_instance_data).reshape(1, -1)
predicted_target = DT.predict(new_instance_data_2d)
print("Predicted Target Value: ", predicted_target[0])

In [None]:
pip install joblib

In [None]:
import joblib
joblib.dump(RFC, 'random_forest_classifier_model.pkl')