In [1]:
import pandas as pd
import pprint as pp

In [2]:
# Read Algo input csv
Algo_input = pd.read_csv("Algo_data_input.csv")
Algo_input

Unnamed: 0,living_wage,income,debt,time_to_repay,decision
0,31546,33463,17933,31.182403,No
1,33975,93355,27498,1.543617,Yes
2,22084,89911,47202,2.319725,Yes
3,31579,48767,44281,8.58758,Yes
4,29970,81802,46493,2.98998,Yes
5,23613,126377,50786,1.647334,Yes
6,26480,70467,32263,2.444889,Yes
7,32890,135635,40689,1.320064,Yes
8,28348,115271,36565,1.402199,Yes
9,34421,57350,37870,5.505401,Yes


In [3]:
target = Algo_input["decision"]
target_names = ["Yes", "No"]

In [4]:
data = Algo_input.drop(["decision","time_to_repay"], axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,living_wage,income,debt
0,31546,33463,17933
1,33975,93355,27498
2,22084,89911,47202
3,31579,48767,44281
4,29970,81802,46493


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(X_train, y_train)

# Model Accuracy
# print('Test Acc: %.3f' % model.score(X_test, y_test))

# Calculate classification report
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

              precision    recall  f1-score   support

         Yes       0.67      0.67      0.67         3
          No       0.90      0.90      0.90        10

    accuracy                           0.85        13
   macro avg       0.78      0.78      0.78        13
weighted avg       0.85      0.85      0.85        13



In [6]:
def Classify(State,Major_Category,Debt):
    import pymongo

    # Create connection variable
    conn = 'mongodb://localhost:27017'

    # Pass connection to the pymongo instance.
    dbconn = pymongo.MongoClient(conn)

    # Connect to a database. Will create one if not already available.
    db = dbconn.FortunEd

    Major_data = db.Majors.find_one({"Major_Category":Major_Category})

    Income_dict = {}
    for data in Major_data["Possible_Occupations"]:
        Occ = data["Occupation"]
        income_split =data["Median_Occ_Salary"].split(",")
        income = ''.join(map(str, income_split))
        low_25_income_split =data['Low_25_Occ_Salary'].split(",")
        low_25_pct_income = ''.join(map(str, low_25_income_split))
        recommended_ed = data['Recommended_Education']
        high_25_income_split = data['High_25_Occ_Salary'].split(",")
        high_25_pct_income =''.join(map(str, high_25_income_split)) 
        Income_dict[Occ] ={"Occ":Occ,
                          "median_income":int(income),
                          "low_25_pct_income":int(low_25_pct_income),
                          "high_25_pct_income":int(high_25_pct_income),
                          "recommended_education":recommended_ed}
                           

    state_wages = db.StateWage

    state_wage_list = []
    state_wage_list = list(state_wages.find())
    result_values = [i[State] for i in state_wage_list if State in i]
    result =result_values[0]
    living_wage_split = result['living wage'].split("$")[1].split(",")
    wage_join =pd.to_numeric(''.join(map(str, living_wage_split)),errors='coerce')  



    output_df = pd.DataFrame(Income_dict.values())
    output_df.columns = ["Occupation","Median_Income","Low_25_pct_income","High_25_pct_income","Recommended_Education"]
    output_df["debt"] = Debt
    output_df["living_wage"] = wage_join
    output_df["major_category"] = Major_Category

    X = output_df.loc[:,["living_wage","Median_Income","debt"]]

    predictions = model.predict(X)
  
    output_df["decision"] = predictions
    outcome = output_df.to_dict("records")
    return outcome

Classify("Alaska","Agriculture & Natural Resources",50000)

[{'Occupation': 'First-Line Supervisors of Farming, Fishing, and Forestry Workers',
  'Median_Income': 48280,
  'Low_25_pct_income': 36940,
  'High_25_pct_income': 64000,
  'Recommended_Education': 'High school diploma or equivalent',
  'debt': 50000,
  'living_wage': 26528,
  'major_category': 'Agriculture & Natural Resources',
  'decision': 'Yes'},
 {'Occupation': 'Agricultural Inspectors',
  'Median_Income': 45490,
  'Low_25_pct_income': 33960,
  'High_25_pct_income': 56340,
  'Recommended_Education': "Bachelor's degree",
  'debt': 50000,
  'living_wage': 26528,
  'major_category': 'Agriculture & Natural Resources',
  'decision': 'Yes'},
 {'Occupation': 'Animal Breeders',
  'Median_Income': 42920,
  'Low_25_pct_income': 31730,
  'High_25_pct_income': 56750,
  'Recommended_Education': 'High school diploma or equivalent',
  'debt': 50000,
  'living_wage': 26528,
  'major_category': 'Agriculture & Natural Resources',
  'decision': 'Yes'},
 {'Occupation': 'Graders and Sorters, Agricultu