In [1]:
import gradio as gr

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.ensemble import IsolationForest
from sklearn.model_selection import cross_validate

In [2]:
def log_reg_predict(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,c_val, max_it, random_seed):
    example = predictor_preprocessor(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil)

    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)
    
    #Fix Dummy Variable Trap
    training_features = training_features.drop(['Wilderness_Area4','Soil_Group4'], axis=1)
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    min_max_scaler = preprocessing.MinMaxScaler()

    forest_data_scaled = min_max_scaler.fit_transform(forest_data)


    # Create a logistic regression model for our data using Sklearn
    logistic_regression_model = LogisticRegression(max_iter=max_it, random_state=random_seed, solver='sag', C=c_val)
    logistic_regression_model.fit(forest_data_scaled, forest_targets)

    #Fix Dummy Variable Trap
    example = example.drop(['Wilderness_Area4','Soil_Group4'], axis=1)

    example_scaled = min_max_scaler.transform(np.asarray(example))

    forest_cov = logistic_regression_model.predict(example_scaled)
    

    return {
            forest_cover: forest_cov[0],
            forest_cover_name: get_forest_cover_name(forest_cov[0])
        }

In [3]:
def knn_predict(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,num_neigh,categorical_scale_factor):
    example = predictor_preprocessor(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil)

    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)
    
    temp_df = training_features.drop(['Elevation', 'Average_Hillshade', 'Sine_Of_Aspect', 'Distance_To_Hydrology', 'Horizontal_Distance_To_Fire_Points', 'Horizontal_Distance_To_Roadways', 'Slope'], axis=1)
    training_features = training_features.drop(['Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4', 'Wilderness_Area1', 'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4'], axis=1)
    
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    scaler = preprocessing.StandardScaler()

    forest_data_scaled = scaler.fit_transform(forest_data)

    for i in range(1,5):
        soil_type = 'Soil_Group' + str(i)
        temp_df[soil_type] = temp_df[soil_type].multiply(categorical_scale_factor)

    for i in range(1,5):
        wilderness_type = 'Wilderness_Area' + str(i)
        temp_df[wilderness_type] = temp_df[wilderness_type].multiply(categorical_scale_factor)

    temp_df = np.asarray(temp_df)

    forest_data_scaled = np.concatenate((forest_data_scaled, temp_df), axis=1)

    # Create a kNN model for our data using Sklearn
    knn_model = KNeighborsClassifier(n_neighbors=num_neigh, weights='distance', algorithm='brute')
    knn_model.fit(forest_data_scaled, forest_targets)

    #Prepare example
    example_temp1 = example.drop(['Elevation', 'Average_Hillshade', 'Sine_Of_Aspect', 'Distance_To_Hydrology', 'Horizontal_Distance_To_Fire_Points', 'Horizontal_Distance_To_Roadways', 'Slope'], axis=1)
    example_temp2 = example.drop(['Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4', 'Wilderness_Area1', 'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4'], axis=1)

    example_temp2 = np.asarray(example_temp2)
    example_scaled = scaler.transform(example_temp2)

    for i in range(1,5):
        soil_type = 'Soil_Group' + str(i)
        example_temp1[soil_type] = example_temp1[soil_type].multiply(categorical_scale_factor)

    for i in range(1,5):
        wilderness_type = 'Wilderness_Area' + str(i)
        example_temp1[wilderness_type] = example_temp1[wilderness_type].multiply(categorical_scale_factor)
    
    example_temp1 = np.asarray(example_temp1)

    example_processed = np.concatenate((example_scaled, example_temp1), axis=1)

    forest_cov = knn_model.predict(example_processed)
    

    return {
            forest_cover: forest_cov[0],
            forest_cover_name: get_forest_cover_name(forest_cov[0])
        }

In [4]:
def d_tree_predict(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,criterion, max_depth, random_seed):
    example = predictor_preprocessor(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil)
    
    the_c = 'gini'
    if criterion == 1:
        the_c = 'entropy'

    
    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)

    # Integer labeling
    training_features['Wilderness_Area'] = np.asarray(training_features['Wilderness_Area1']) + 2*np.asarray(training_features['Wilderness_Area2']) + 3*np.asarray(training_features['Wilderness_Area3']) + 4*np.asarray(training_features['Wilderness_Area4'])
    training_features['Soil_Group'] = np.asarray(training_features['Soil_Group1']) + 2*np.asarray(training_features['Soil_Group2']) + 3*np.asarray(training_features['Soil_Group3']) + 4*np.asarray(training_features['Soil_Group4'])

    training_features = training_features.drop(['Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4','Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4'], axis=1)
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    scaler = preprocessing.StandardScaler()

    forest_data_scaled = scaler.fit_transform(forest_data)


    # Create a decision tree classifier model for our data using Sklearn
    decision_tree_model = DecisionTreeClassifier(criterion=the_c, max_depth=max_depth, random_state=random_seed)
    decision_tree_model.fit(forest_data_scaled, forest_targets)

    example['Wilderness_Area'] = np.asarray(example['Wilderness_Area1']) + 2*np.asarray(example['Wilderness_Area2']) + 3*np.asarray(example['Wilderness_Area3']) + 4*np.asarray(example['Wilderness_Area4'])
    example['Soil_Group'] = np.asarray(example['Soil_Group1']) + 2*np.asarray(example['Soil_Group2']) + 3*np.asarray(example['Soil_Group3']) + 4*np.asarray(example['Soil_Group4'])

    example = example.drop(['Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4','Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4'], axis=1)

    example_scaled = scaler.transform(np.asarray(example))
    
    forest_cov = decision_tree_model.predict(example_scaled)

    
    return {
            forest_cover: forest_cov[0],
            forest_cover_name: get_forest_cover_name(forest_cov[0])
        }

In [5]:
def rf_predict(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,num_estimators, criterion, max_depth, random_seed):
    example = predictor_preprocessor(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil)

    the_c = 'gini'
    if criterion == 1:
        the_c = 'entropy'

    
    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)

    training_features['Wilderness_Area'] = np.asarray(training_features['Wilderness_Area1']) + 2*np.asarray(training_features['Wilderness_Area2']) + 3*np.asarray(training_features['Wilderness_Area3']) + 4*np.asarray(training_features['Wilderness_Area4'])
    training_features['Soil_Group'] = np.asarray(training_features['Soil_Group1']) + 2*np.asarray(training_features['Soil_Group2']) + 3*np.asarray(training_features['Soil_Group3']) + 4*np.asarray(training_features['Soil_Group4'])

    training_features = training_features.drop(['Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4','Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4'], axis=1)
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    scaler = preprocessing.StandardScaler()

    forest_data_scaled = scaler.fit_transform(forest_data)


    # Create a random forest classifier model for our data using Sklearn
    rf_model = RandomForestClassifier(num_estimators, criterion=the_c, max_depth=max_depth, random_state=random_seed)
    rf_model.fit(forest_data_scaled, forest_targets)

    example['Wilderness_Area'] = np.asarray(example['Wilderness_Area1']) + 2*np.asarray(example['Wilderness_Area2']) + 3*np.asarray(example['Wilderness_Area3']) + 4*np.asarray(example['Wilderness_Area4'])
    example['Soil_Group'] = np.asarray(example['Soil_Group1']) + 2*np.asarray(example['Soil_Group2']) + 3*np.asarray(example['Soil_Group3']) + 4*np.asarray(example['Soil_Group4'])

    example = example.drop(['Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4','Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4'], axis=1)

    example_scaled = scaler.transform(np.asarray(example))
    
    forest_cov = rf_model.predict(example_scaled)


    
    return {
            forest_cover: forest_cov[0],
            forest_cover_name: get_forest_cover_name(forest_cov[0])
        }

In [6]:
def get_forest_cover_name(cover_type):
    if (cover_type == 1):
        return 'Spruce/Fir'
    elif (cover_type == 2):
        return 'Lodgepole Pine'
    elif (cover_type == 3):
        return 'Ponderosa Pine'
    elif (cover_type == 4):
        return 'Cottonwood/Willow'
    elif (cover_type == 5):
        return 'Aspen'
    elif (cover_type == 6):
        return 'Douglas-fir'
    else:
        return 'Krummholz'

In [7]:
def predictor_preprocessor(elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil):
    example = pd.DataFrame()
    #Placeholder since Id column gets dropped
    example['Id'] = [0]
    example['Elevation'] = [elevation]
    example['Aspect'] = [aspect]
    example['Slope'] = [slope]
    example['Horizontal_Distance_To_Hydrology'] = [h_dist_hydro]
    example['Vertical_Distance_To_Hydrology'] = [v_dist_hydro]
    example['Horizontal_Distance_To_Roadways'] = [h_dist_road]
    example['Hillshade_9am'] = [shade_9]
    example['Hillshade_Noon'] = [shade_12]
    example['Hillshade_3pm'] = [shade_3]
    example['Horizontal_Distance_To_Fire_Points'] = [h_dist_fire]

    for i in range(1,5):
        wilderness_type = 'Wilderness_Area' + str(i)
        if (wilderness == i):
            example[wilderness_type] = [1]
        else:
            example[wilderness_type] = [0]

    for i in range(1,41):
        soil_type = 'Soil_Type' + str(i)
        if (soil == i):
            example[soil_type] = [1]
        else:
            example[soil_type] = [0]
    
    #Drop ID columns
    example = example.drop(['Id'], axis=1)
    
    #Hydrology distance euclidean
    water_dist = np.asarray([example['Horizontal_Distance_To_Hydrology'],example['Vertical_Distance_To_Hydrology']])
    water_euclidean_dist = np.sqrt(np.square(water_dist[0]) + np.square(water_dist[1]))

    example['Distance_To_Hydrology'] = pd.Series(water_euclidean_dist)
    example = example.drop(['Horizontal_Distance_To_Hydrology','Vertical_Distance_To_Hydrology'], axis=1)

    #Sine of Aspect
    aspect = np.asarray(example['Aspect'])
    aspect_sine = np.sin(aspect * np.pi / 180)

    example['Sine_Of_Aspect'] = pd.Series(aspect_sine)
    
    example = example.drop(['Aspect'], axis=1)
    
    #Average Hillshade
    avg_hillshade = np.asarray([example['Hillshade_9am'],example['Hillshade_Noon'],example['Hillshade_3pm']])
    avg_hillshade = (avg_hillshade[0] + avg_hillshade[1] + avg_hillshade[2]) / 3
    
    example['Average_Hillshade'] = pd.Series(avg_hillshade)
    
    #Drop remaining unwanted features
    #training_features = training_features.drop(['Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon','Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points'], axis=1)
    example = example.drop(['Hillshade_9am', 'Hillshade_Noon','Hillshade_3pm'], axis=1)


    soil_groups = [ 
                    [1,2,3,4,5,6,7,8,9],
                    [10,11,12,13,14,16,17],
                    [18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33],
                    [34,35,36,37,38,39,40]
                ]

    for i in range(len(soil_groups)):
        soil_group = 'Soil_Group' + str(i+1)
        example[soil_group] = pd.Series(np.zeros_like(np.asarray(example['Soil_Type1'])))
        for j in soil_groups[i]:
            soil_type = 'Soil_Type' + str(j)
            example[soil_group] += example[soil_type]

    soil_types = []
    for i in range(1,41):
        soil_type = 'Soil_Type' + str(i)
        soil_types.append(soil_type)

    example = example.drop(soil_types, axis=1)
    
    
    return example


In [8]:
def forest_data_preprocessor(forest_data):
    targets = forest_data['Cover_Type']
    
    #Drop ID and target columns
    training_features = forest_data.drop(['Cover_Type','Id'], axis=1)
    
    #Hydrology distance euclidean
    water_dist = np.asarray([training_features['Horizontal_Distance_To_Hydrology'],training_features['Vertical_Distance_To_Hydrology']])
    water_euclidean_dist = np.sqrt(np.square(water_dist[0]) + np.square(water_dist[1]))

    training_features['Distance_To_Hydrology'] = pd.Series(water_euclidean_dist)
    training_features = training_features.drop(['Horizontal_Distance_To_Hydrology','Vertical_Distance_To_Hydrology'], axis=1)

    #Sine of Aspect
    aspect = np.asarray(training_features['Aspect'])
    aspect_sine = np.sin(aspect * np.pi / 180)

    training_features['Sine_Of_Aspect'] = pd.Series(aspect_sine)
    
    training_features = training_features.drop(['Aspect'], axis=1)
    
    #Average Hillshade
    avg_hillshade = np.asarray([training_features['Hillshade_9am'],training_features['Hillshade_Noon'],training_features['Hillshade_3pm']])
    avg_hillshade = (avg_hillshade[0] + avg_hillshade[1] + avg_hillshade[2]) / 3
    
    training_features['Average_Hillshade'] = pd.Series(avg_hillshade)
    
    #Drop remaining unwanted features
    #training_features = training_features.drop(['Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon','Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points'], axis=1)
    training_features = training_features.drop(['Hillshade_9am', 'Hillshade_Noon','Hillshade_3pm'], axis=1)


    soil_groups = [ 
                    [1,2,3,4,5,6,7,8,9],
                    [10,11,12,13,14,16,17],
                    [18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33],
                    [34,35,36,37,38,39,40]
                ]

    for i in range(len(soil_groups)):
        soil_group = 'Soil_Group' + str(i+1)
        training_features[soil_group] = pd.Series(np.zeros_like(np.asarray(training_features['Soil_Type1'])))
        for j in soil_groups[i]:
            soil_type = 'Soil_Type' + str(j)
            training_features[soil_group] += training_features[soil_type]

    soil_types = []
    for i in range(1,41):
        soil_type = 'Soil_Type' + str(i)
        soil_types.append(soil_type)

    training_features = training_features.drop(soil_types, axis=1)

    
    return training_features, targets


In [9]:
def perform_visualization():
    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)


    fig1 = categorical_visualization(df)

    fig2 = numerical_visualization(training_features, targets)

    return {
            bar_charts: gr.Plot(fig1, visible=True),
            histograms: gr.Plot(fig2, visible=True)
        }
    


In [10]:
def categorical_visualization(df):
    my_figure = plt.figure(1, figsize=(10, 10))
    
    #All examples have a soil type
    soil_counts = np.zeros((40,7))
    for i in range(1,41):
        soil_type = 'Soil_Type' + str(i)
        for j in range(1,8):
            soil_counts[i-1][j-1] = df[soil_type][df[soil_type] == 1][df['Cover_Type'] == j].count()
    
    
    #All examples have a wilderness area
    wild_counts = np.zeros((4,7))
    for i in range(1,5):
        wilderness_type = 'Wilderness_Area' + str(i)
        for j in range(1,8):
            wild_counts[i-1][j-1] = df[wilderness_type][df[wilderness_type] == 1][df['Cover_Type'] == j].count()
    
    bottom = np.zeros(4)
    
    plt.subplot(3, 1, 1)
    plt.subplots_adjust(hspace=1)
    for i in range(7):
        plt.bar([1,2,3,4],wild_counts[:,i], bottom = bottom)
        bottom += wild_counts[:,i]
    
    plt.xticks([1,2,3,4])
    plt.title('Wilderness Area Stacked Bar Chart')
    plt.legend(['1','2','3','4','5','6','7'], bbox_to_anchor=(1.025,1))
    plt.ylabel('Frequency')
    plt.xlabel('Wilderness Area')
    
    
    bottom = np.zeros(40)
    plt.subplot(3, 1, 2)
    for i in range(7):
        plt.bar(range(1,41) , soil_counts[:,i], bottom = bottom)
        bottom += soil_counts[:,i]
    
    plt.xticks(range(1,41,2))
    plt.title('Soil Type Stacked Bar Chart')
    plt.legend(['1','2','3','4','5','6','7'], bbox_to_anchor=(1.025,1))
    plt.ylabel('Frequency')
    plt.xlabel('Soil Type')


    soil_groups = [ 
                    [1,2,3,4,5,6,7,8,9],
                    [10,11,12,13,14,16,17],
                    [18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33],
                    [34,35,36,37,38,39,40]
                ]
    
    soil_group_counts = np.zeros((4,7))

    for i,soil_group in enumerate(soil_groups):
        for j in soil_group:
            for k in range(7):
                soil_group_counts[i][k] += soil_counts[j-1][k]

    bottom = np.zeros(4)
    
    plt.subplot(3, 1, 3)
    plt.subplots_adjust(hspace=1)
    for i in range(7):
        plt.bar([1,2,3,4],soil_group_counts[:,i], bottom = bottom)
        bottom += soil_group_counts[:,i]
    
    plt.xticks([1,2,3,4])
    plt.title('Soil Group Stacked Bar Chart')
    plt.legend(['1','2','3','4','5','6','7'], bbox_to_anchor=(1.025,1))
    plt.ylabel('Frequency')
    plt.xlabel('Soil Group')
            

    
    return my_figure

In [11]:
def numerical_visualization(training_features, targets):
    my_figure = plt.figure(2, figsize=(10, 10))
    indices = ['Elevation', 'Sine_Of_Aspect', 'Slope', 'Distance_To_Hydrology',  'Horizontal_Distance_To_Roadways', 'Average_Hillshade', 'Horizontal_Distance_To_Fire_Points']
    units = ['Meters','Sine of Aspect in Degrees Azimuth', 'Degrees', 'Distance to Nearest Surface Water Features', 'Distance to Nearest Roadway', 'Average Hillshade Index During Summer Solstice','Distance to Nearest Wildfire Ignition Points']
    
    plt.subplots_adjust(wspace=0.5, hspace=1)
    for h,i in enumerate(indices):
        plt.subplot(4,2, h+1)
        plt.hist([training_features[i][targets == 1],training_features[i][targets == 2],training_features[i][targets == 3],training_features[i][targets == 4],training_features[i][targets == 5],training_features[i][targets == 6],training_features[i][targets == 7]], histtype='step',label=['1','2','3','4','5','6','7'])
        plt.title(i.replace('_', ' ') + " Histogram")
        plt.ylabel('Frequency')
        plt.xlabel(units[h])
        plt.legend(bbox_to_anchor=(1.025,1))
        
    return my_figure

In [12]:
def do_logreg(c_val, max_it, random_seed, num_folds):

    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)
    
    #Fix Dummy Variable Trap
    training_features = training_features.drop(['Wilderness_Area4','Soil_Group4'], axis=1)
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    min_max_scaler = preprocessing.MinMaxScaler()

    forest_data_scaled = min_max_scaler.fit_transform(forest_data)


    # Create a logistic regression model for our data using Sklearn
    logistic_regression_model = LogisticRegression(max_iter=max_it, random_state=random_seed, solver='sag', C=c_val)
    
    cross_validator_results = cross_validate(logistic_regression_model, forest_data_scaled, forest_targets, scoring=['accuracy','f1_weighted'], cv=num_folds)



    return {
            output_col: gr.Column(visible=True),
            avg_accuracy: np.mean(cross_validator_results['test_accuracy']),
            avg_stdev: np.std(cross_validator_results['test_accuracy'], ddof=1),
            avg_f1: np.mean(cross_validator_results['test_f1_weighted']),
            f1_stdev: np.std(cross_validator_results['test_f1_weighted'], ddof=1),
        }

In [13]:
def do_knn(num_neigh, categorical_scale_factor, num_folds):

    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)
    
    temp_df = training_features.drop(['Elevation', 'Average_Hillshade', 'Sine_Of_Aspect', 'Distance_To_Hydrology', 'Horizontal_Distance_To_Fire_Points', 'Horizontal_Distance_To_Roadways', 'Slope'], axis=1)
    training_features = training_features.drop(['Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4', 'Wilderness_Area1', 'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4'], axis=1)
    
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)

    scaler = preprocessing.StandardScaler()

    forest_data_scaled = scaler.fit_transform(forest_data)

    for i in range(1,5):
        soil_type = 'Soil_Group' + str(i)
        temp_df[soil_type] = temp_df[soil_type].multiply(categorical_scale_factor)

    for i in range(1,5):
        wilderness_type = 'Wilderness_Area' + str(i)
        temp_df[wilderness_type] = temp_df[wilderness_type].multiply(categorical_scale_factor)

    temp_df = np.asarray(temp_df)

    forest_data_scaled = np.concatenate((forest_data_scaled, temp_df), axis=1)
    
    #print(forest_data_scaled[:,0:10])

    # Create a kNN model for our data using Sklearn
    knn_model = KNeighborsClassifier(n_neighbors=num_neigh, weights='distance', algorithm='brute')


    cross_validator_results = cross_validate(knn_model, forest_data_scaled, forest_targets, scoring=['accuracy','f1_weighted'], cv=num_folds)



    return {
            output_col: gr.Column(visible=True),
            avg_accuracy: np.mean(cross_validator_results['test_accuracy']),
            avg_stdev: np.std(cross_validator_results['test_accuracy'], ddof=1),
            avg_f1: np.mean(cross_validator_results['test_f1_weighted']),
            f1_stdev: np.std(cross_validator_results['test_f1_weighted'], ddof=1),
        }

In [14]:
def do_dtree(criterion, max_depth, random_seed, num_folds):

    the_c = 'gini'
    if criterion == 1:
        the_c = 'entropy'

    
    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)

    training_features['Wilderness_Area'] = np.asarray(training_features['Wilderness_Area1']) + 2*np.asarray(training_features['Wilderness_Area2']) + 3*np.asarray(training_features['Wilderness_Area3']) + 4*np.asarray(training_features['Wilderness_Area4'])
    training_features['Soil_Group'] = np.asarray(training_features['Soil_Group1']) + 2*np.asarray(training_features['Soil_Group2']) + 3*np.asarray(training_features['Soil_Group3']) + 4*np.asarray(training_features['Soil_Group4'])

    training_features = training_features.drop(['Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4','Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4'], axis=1)
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    scaler = preprocessing.StandardScaler()

    forest_data_scaled = scaler.fit_transform(forest_data)


    # Create a decision tree classifier model for our data using Sklearn
    decision_tree_model = DecisionTreeClassifier(criterion=the_c, max_depth=max_depth, random_state=random_seed)
    
    cross_validator_results = cross_validate(decision_tree_model, forest_data_scaled, forest_targets, scoring=['accuracy','f1_weighted'], cv=num_folds)



    return {
            output_col: gr.Column(visible=True),
            avg_accuracy: np.mean(cross_validator_results['test_accuracy']),
            avg_stdev: np.std(cross_validator_results['test_accuracy'], ddof=1),
            avg_f1: np.mean(cross_validator_results['test_f1_weighted']),
            f1_stdev: np.std(cross_validator_results['test_f1_weighted'], ddof=1),
        }

In [15]:
def do_rf(num_estimators, criterion, max_depth, random_seed, num_folds):

    the_c = 'gini'
    if criterion == 1:
        the_c = 'entropy'

    
    df = pd.read_csv("train.csv")
    training_features, targets = forest_data_preprocessor(df)

    training_features['Wilderness_Area'] = np.asarray(training_features['Wilderness_Area1']) + 2*np.asarray(training_features['Wilderness_Area2']) + 3*np.asarray(training_features['Wilderness_Area3']) + 4*np.asarray(training_features['Wilderness_Area4'])
    training_features['Soil_Group'] = np.asarray(training_features['Soil_Group1']) + 2*np.asarray(training_features['Soil_Group2']) + 3*np.asarray(training_features['Soil_Group3']) + 4*np.asarray(training_features['Soil_Group4'])

    training_features = training_features.drop(['Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4','Soil_Group1','Soil_Group2','Soil_Group3','Soil_Group4'], axis=1)
    
    forest_data = np.asarray(training_features)
    forest_targets = np.asarray(targets)
    
    scaler = preprocessing.StandardScaler()

    forest_data_scaled = scaler.fit_transform(forest_data)


    # Create a random forest classifier model for our data using Sklearn
    rf_model = RandomForestClassifier(num_estimators, criterion=the_c, max_depth=max_depth, random_state=random_seed)
    
    cross_validator_results = cross_validate(rf_model, forest_data_scaled, forest_targets, scoring=['accuracy','f1_weighted'], cv=num_folds)



    return {
            output_col: gr.Column(visible=True),
            avg_accuracy: np.mean(cross_validator_results['test_accuracy']),
            avg_stdev: np.std(cross_validator_results['test_accuracy'], ddof=1),
            avg_f1: np.mean(cross_validator_results['test_f1_weighted']),
            f1_stdev: np.std(cross_validator_results['test_f1_weighted'], ddof=1),
        }

In [16]:

with gr.Blocks() as demo:
    with gr.Column() as predictor:
        elevation = gr.Number(100,label="Elevation")
        aspect = gr.Number(0,label="Aspect")
        slope = gr.Number(0,label="Slope")
        h_dist_hydro = gr.Number(100,label="Horizontal Distance To Hydrology")
        v_dist_hydro = gr.Number(100,label="Vertical Distance To Hydrology")
        h_dist_road = gr.Number(100,label="Horizontal Distance To Roadways")
        shade_9 = gr.Number(128,label="Hillshade 9 AM (0-255)")
        shade_12 = gr.Number(128,label="Hillshade Noon (0-255)")
        shade_3 = gr.Number(128,label="Hillshade 3 PM (0-255)")
        h_dist_fire = gr.Number(100,label="Horizontal Distance To Fire Points")
        wilderness = gr.Number(1,label="Wilderness Area (1-4)")
        soil = gr.Number(1,label="Soil Type (1-40)")

        with gr.Row() as predictor_row:
            
            with gr.Column() as log_reg_p:    
                maximum_iter_p = gr.Number(600, label="Maximum Iterations")
                seed_val_p = gr.Number(13124, label="Random Seed Value")
                c_param_p = gr.Number(64, label="C")
                log_reg_predictor = gr.Button("Predict with Logistic Regression")
            with gr.Column() as knn_p:
                k_neigh_p = gr.Number(1, label="Number of Neighbors")
                cat_scale_p = gr.Number(5, label="Scale for Categorical Attributes")
                knn_predictor = gr.Button("Predict with k-NN Classifier")
            with gr.Column() as d_tree_p:
                criterion_p = gr.Number(1, label="Criterion (0: GINI, 1: Entropy)")
                seed_val2_p = gr.Number(13124, label="Random Seed Value")
                max_depth_p = gr.Number(13, label="Maximum Depth")
                d_tree_predictor = gr.Button("Predict with Decision Tree Classifier")
            with gr.Column() as rf_p:
                num_est_p = gr.Number(150, label="Number of Estimators")
                criterion2_p = gr.Number(1, label="Criterion (0: GINI, 1: Entropy)")
                seed_val3_p = gr.Number(13124, label="Random Seed Value")
                max_depth2_p = gr.Number(19, label="Maximum Depth")
                rf_predictor = gr.Button("Predict with Random Forest Classifier")

        with gr.Column() as predictor_output:
            forest_cover = gr.Text(label="Forest Cover Type")
            forest_cover_name = gr.Text(label="Forest Cover Type Name")
            
        
    with gr.Column() as misc_col:
    
        visualizer = gr.Button("Perform Data Visualization")
        bar_charts = gr.Plot(container=True, visible=False)
        histograms = gr.Plot(container=True, visible=False)
        
        visualizer.click(fn=perform_visualization, outputs=[bar_charts,histograms])
    
        with gr.Row() as model_row:
           
            with gr.Column() as logRegCol:
                maximum_iter = gr.Number(600, label="Maximum Iterations")
                seed_val = gr.Number(13124, label="Random Seed Value")
                c_param = gr.Number(64, label="C")
                tester1 = gr.Button("Cross Validate Logistic Regression")
    
            with gr.Column() as knnCol:
                k_neigh = gr.Number(1, label="Number of Neighbors")
                cat_scale = gr.Number(5, label="Scale for Categorical Attributes")
                tester2 = gr.Button("Cross Validate k-NN Classifier")
    
            with gr.Column() as dTreeCol:
                criterion = gr.Number(1, label="Criterion (0: GINI, 1: Entropy)")
                seed_val2 = gr.Number(13124, label="Random Seed Value")
                max_depth = gr.Number(13, label="Maximum Depth")
                tester3 = gr.Button("Cross Validate Decision Tree Classifier")
    
            with gr.Column() as rfCol:
                num_est = gr.Number(150, label="Number of Estimators")
                criterion2 = gr.Number(1, label="Criterion (0: GINI, 1: Entropy)")
                seed_val3 = gr.Number(13124, label="Random Seed Value")
                max_depth2 = gr.Number(19, label="Maximum Depth")
                tester4 = gr.Button("Cross Validate Random Forest Classifier")
               
        cv_num_folds = gr.Number(7, label='Number of Folds for Cross Validation')
        
    
        with gr.Column() as output_col:
            avg_accuracy = gr.Number(label="Average Accuracy Score")
            avg_stdev = gr.Number(label="Accuracy Score Standard Deviation")
            avg_f1 = gr.Number(label="Average F1 Score")
            f1_stdev = gr.Number(label="F1 Score Standard Deviation")

    tester1.click(fn=do_logreg, inputs=[c_param,maximum_iter,seed_val,cv_num_folds], outputs=[output_col,avg_accuracy,avg_stdev,avg_f1,f1_stdev])
    tester2.click(fn=do_knn, inputs=[k_neigh,cat_scale,cv_num_folds], outputs=[output_col,avg_accuracy,avg_stdev,avg_f1,f1_stdev])
    tester3.click(fn=do_dtree, inputs=[criterion,max_depth,seed_val2,cv_num_folds], outputs=[output_col,avg_accuracy,avg_stdev,avg_f1,f1_stdev])
    tester4.click(fn=do_rf, inputs=[num_est,criterion2,max_depth2,seed_val3,cv_num_folds], outputs=[output_col,avg_accuracy,avg_stdev,avg_f1,f1_stdev])

    knn_predictor.click(fn=knn_predict, inputs=[elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,k_neigh_p,cat_scale_p], outputs=[forest_cover,forest_cover_name])
    log_reg_predictor.click(fn=log_reg_predict, inputs=[elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,c_param_p,maximum_iter_p,seed_val_p], outputs=[forest_cover,forest_cover_name])
    d_tree_predictor.click(fn=d_tree_predict, inputs=[elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,criterion_p,max_depth_p,seed_val2_p], outputs=[forest_cover,forest_cover_name])
    rf_predictor.click(fn=rf_predict, inputs=[elevation,aspect,slope,h_dist_hydro,v_dist_hydro,h_dist_road,shade_9,shade_12,shade_3,h_dist_fire,wilderness,soil,num_est_p,criterion2_p,max_depth2_p,seed_val3_p], outputs=[forest_cover,forest_cover_name])

    
demo.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


