In [1]:
# import pyspark modules
import os
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row
from pyspark.sql import types as typ     # for datatype conversion
from pyspark.sql import functions as F  # for col() function
from pyspark.ml.linalg import DenseVector
from pyspark.ml.feature import StandardScaler
from pyspark.ml.regression import LinearRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sc = SparkContext.getOrCreate()
sqlCtx = SQLContext(sc)

# Data Import and Preprocessing

In [2]:
suicide = pd.read_csv('master.csv')

In [3]:
df = sqlCtx.createDataFrame(suicide)

In [5]:
#rename in case of name sensitivity
df = df.withColumnRenamed("suicides/100k pop", "suicide_100k_pop") \
.withColumnRenamed("gdp_per_capita ($)", "gdp_per_capita")

In [6]:
df.printSchema()

root
 |-- country: string (nullable = true)
 |-- year: long (nullable = true)
 |-- sex: string (nullable = true)
 |-- age: string (nullable = true)
 |-- suicides_no: long (nullable = true)
 |-- population: long (nullable = true)
 |-- suicide_100k_pop: double (nullable = true)
 |-- country-year: string (nullable = true)
 |-- HDI for year: double (nullable = true)
 |--  gdp_for_year ($) : string (nullable = true)
 |-- gdp_per_capita: long (nullable = true)
 |-- generation: string (nullable = true)



## feature engineering

In [7]:
import pyspark.mllib.stat as st
import numpy as np
from pyspark.sql.functions import col, asc

In [8]:
#feature transform: grouping countries into regions
Africa = ["Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cameroon","Cabo Verde", "Cape Verde",
          "Central African Republic", "Chad", "Comoros", "Côte d’Ivoire", "Democratic Republic of the Congo",
          "Equatorial Guinea", "Eritrea", "Ethiopia", "Gabon", "Gambia", "Ghana", "Guinea", "Guinea-Bissau",
          "Kenya", "Lesotho", "Liberia", "Madagascar", "Malawi", "Mali", "Mauritania", "Mauritius", "Mozambique",
          "Namibia", "Niger", "Nigeria", "Republic of the Congo", "Rwanda", "Sao Tome and Principe", "Senegal", 
          "Seychelles", "Sierra Leone", "South Africa", "Swaziland", "Togo", "Uganda", "United Republic of Tanzania",
          "Zambia", "Zimbabwe"]
Americas = ["Antigua and Barbuda", "Aruba","Argentina", "Bahamas", "Barbados", "Belize", "Bolivia (Plurinational State of)", 
            "Brazil", "Canada", "Chile", "Colombia", "Costa Rica", "Cuba", "Dominica", "Dominican Republic", "Ecuador",
            "El Salvador", "Grenada", "Guatemala", "Guyana", "Haiti", "Honduras", "Jamaica", "Mexico", "Nicaragua", "Panama",
            "Paraguay", "Peru","Puerto Rico", "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and Grenadines", "Suriname", 
            "Trinidad and Tobago", "United States", "Uruguay", "Venezuela (Bolivarian Republic of)"]
SouthEastAsia = ["Bangladesh", "Bhutan", "Democratic People’s Republic of Korea", "India", "Indonesia", "Maldives", "Myanmar",
                 "Nepal", "Sri Lanka", "Thailand", "Timor-Leste"]
European = ["Albania", "Andorra", "Armenia", "Austria", "Azerbaijan", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria",
            "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Georgia", "Germany", "Greece", 
            "Hungary", "Iceland", "Ireland", "Israel", "Italy", "Kazakhstan", "Kyrgyzstan", "Latvia", "Lithuania", "Luxembourg",
            "Malta", "Monaco", "Montenegro", "Netherlands", "Norway", "Poland", "Portugal", "Republic of Moldova", "Romania",
            "Russian Federation", "San Marino", "Serbia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Tajikistan",
            "The former Yugoslav Republic of Macedonia", "Turkey", "Turkmenistan", "Ukraine", "United Kingdom", "Uzbekistan"]
EasternMediterranean = ["Afghanistan", "Bahrain", "Djibouti", "Egypt", "Iran (Islamic Republic of)", "Iraq", "Jordan", 
                        "Kuwait", "Lebanon", "Libyan Arab Jamahiriya", "Morocco", "Oman", "Pakistan", "Qatar", "Saudi Arabia",
                        "Somalia", "Sudan", "Syrian Arab Republic", "Tunisia", "United Arab Emirates", "Yemen"]
WesternPacific = ["Australia", "Brunei Darussalam", "Cambodia", "China", "Cook Islands", "Fiji", "Japan", "Kiribati", 
                  "Lao People’s Democratic Republic", "Malaysia", "Marshall Islands", "Micronesia (Federated States of)", 
                  "Mongolia", "Nauru", "New Zealand", "Niue", "Palau", "Papua New Guinea", "Philippines", "Republic of Korea",
                  "Samoa", "Singapore", "Solomon Islands", "Taiwan", "Tonga", "Tuvalu", "Vanuatu", "Vietnam","Macau"]

In [11]:
df_processed = df.withColumn('region', F.when(F.col('country').isin(Africa), 'Africa')\
.when(F.col('country').isin(Americas), 'Americas') \
.when(F.col('country').isin(SouthEastAsia), 'SouthEastAsia')\
.when(F.col('country').isin(European), 'European')\
.when(F.col('country').isin(WesternPacific ), 'WesternPacific ')\
.when(F.col('country').isin(EasternMediterranean), 'EasternMediterranean'))                                                    

In [12]:
df_processed.printSchema()

root
 |-- country: string (nullable = true)
 |-- year: long (nullable = true)
 |-- sex: string (nullable = true)
 |-- age: string (nullable = true)
 |-- suicides_no: long (nullable = true)
 |-- population: long (nullable = true)
 |-- suicide_100k_pop: double (nullable = true)
 |-- country-year: string (nullable = true)
 |-- HDI for year: double (nullable = true)
 |--  gdp_for_year ($) : string (nullable = true)
 |-- gdp_per_capita: long (nullable = true)
 |-- generation: string (nullable = true)
 |-- region: string (nullable = true)



In [33]:
df_trimmed = df_processed.select(["suicide_100k_pop",
                "population",
                "year",
                "gdp_per_capita",
                "sex",                              
                "age",
                "region",                 
                "generation"])

In [34]:
df_trimmed.printSchema()

root
 |-- suicide_100k_pop: double (nullable = true)
 |-- population: long (nullable = true)
 |-- year: long (nullable = true)
 |-- gdp_per_capita: long (nullable = true)
 |-- sex: string (nullable = true)
 |-- age: string (nullable = true)
 |-- region: string (nullable = true)
 |-- generation: string (nullable = true)



In [97]:
#Create pipeline to collect categorical features for OneHotEncoder
from pyspark.ml import Pipeline
from pyspark.ml.feature import StringIndexer, OneHotEncoder, VectorAssembler

cols = ["sex", "age", "generation","region"]

indexers = [
    StringIndexer(inputCol=c, outputCol="{0}_indexed".format(c))
    for c in cols
]

encoders = [
    OneHotEncoder(
        inputCol=indexer.getOutputCol(),
        outputCol="{0}_encoded".format(indexer.getOutputCol())) 
    for indexer in indexers
]

assembler = VectorAssembler(
    inputCols=[encoder.getOutputCol() for encoder in encoders],
    outputCol="features"
)


pipeline = Pipeline(stages=indexers + encoders + [assembler])

In [98]:
allData = pipeline.fit(df_trimmed).transform(df_trimmed)

In [99]:
allData = allData.withColumn('label', F.col('suicide_100k_pop'))

In [100]:
allData.printSchema()

root
 |-- suicide_100k_pop: double (nullable = true)
 |-- population: long (nullable = true)
 |-- year: long (nullable = true)
 |-- gdp_per_capita: long (nullable = true)
 |-- sex: string (nullable = true)
 |-- age: string (nullable = true)
 |-- region: string (nullable = true)
 |-- generation: string (nullable = true)
 |-- sex_indexed: double (nullable = false)
 |-- age_indexed: double (nullable = false)
 |-- generation_indexed: double (nullable = false)
 |-- region_indexed: double (nullable = false)
 |-- sex_indexed_encoded: vector (nullable = true)
 |-- age_indexed_encoded: vector (nullable = true)
 |-- generation_indexed_encoded: vector (nullable = true)
 |-- region_indexed_encoded: vector (nullable = true)
 |-- features: vector (nullable = true)
 |-- label: double (nullable = true)



In [101]:
train_data, test_data = allData.randomSplit([0.8,0.2], seed=123)

## Model Construction

In [27]:
from pyspark.ml import Pipeline
from pyspark.ml.regression import RandomForestRegressor
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.mllib.tree import RandomForest
from pyspark.mllib.linalg import Vectors
from pyspark.mllib.regression import LabeledPoint
from pyspark.ml.evaluation import RegressionEvaluator

In [107]:
import random
random.seed(123)

In [62]:
import time
import pyspark
import os
import csv
from numpy import array
from pyspark.mllib.regression import LabeledPoint
from pyspark import SparkContext, SparkConf

### 1. Decision Tree Regression

In [50]:
from pyspark.ml.regression import DecisionTreeRegressor
from pyspark.ml.evaluation import RegressionEvaluator

In [108]:
# Train a DecisionTree model.
dt = DecisionTreeRegressor(labelCol="label", featuresCol="features")

In [110]:
model = dt.fit(train_Data)

In [111]:
predictions = model.transform(testData)

In [112]:
evaluator = RegressionEvaluator(
    labelCol="label", predictionCol="prediction", metricName="rmse")
rmse_dt = evaluator.evaluate(predictions)

In [114]:
rmse_dt

15.066899570896506

### 2. Random Forest Regression

In [115]:
from pyspark.mllib.evaluation import MulticlassMetrics

In [102]:
# Train a RandomForest model.
rf = RandomForestRegressor(labelCol="label", featuresCol="features")

In [103]:
model = rf.fit(train_data)

In [104]:
predictions= model.transform(test_Data)

In [105]:
evaluator = RegressionEvaluator(
    labelCol="label", predictionCol="prediction", metricName="rmse")
rmse_rf = evaluator.evaluate(predictions)

In [106]:
rmse_rf

15.110305013376415

### 3. Gradient-boosted tree regression

In [134]:
from pyspark.ml.regression import GBTRegressor

In [136]:
# Train a GBT model.
gbt = GBTRegressor(labelCol="label", featuresCol="features", maxIter=10)

In [137]:
model = gbt.fit(train_data)

In [138]:
predictions= model.transform(test_Data)

In [139]:
evaluator = RegressionEvaluator(
    labelCol="label", predictionCol="prediction", metricName="rmse")
rmse_gbt = evaluator.evaluate(predictions)

In [140]:
rmse_gbt

14.583728898502146

### 4. Linear Regression

In [155]:
from pyspark.ml.regression import LinearRegression
lr = LinearRegression(featuresCol = 'features', labelCol='label', maxIter=10, regParam=0.3, elasticNetParam=0.8)
lr_model = lr.fit(train_Data)
print("Coefficients: " + str(lr_model.coefficients))
print("Intercept: " + str(lr_model.intercept))

Coefficients: [-14.308768672609885,7.221775197965496,6.425449690187302,4.511536432569709,5.7446944328984095,12.792879179310049,0.4716257591148596,4.916171735307697,-1.3395599549243613,3.0352453224821225,7.303140340813797,3.689109103801656,-3.2094413737091605,2.556675770684565,-3.2068282217833937,-9.308280255235365]
Intercept: 11.016615294972919


In [156]:
predictions= lr_model.transform(test_Data)

In [157]:
evaluator = RegressionEvaluator(
    labelCol="label", predictionCol="prediction", metricName="rmse")
rmse_lr = evaluator.evaluate(predictions)

In [159]:
rmse_lr 

15.718056011812015

### Logistic Regression

In [154]:
from pyspark.ml.classification import LogisticRegression

In [None]:
lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)

In [None]:
lrModel = lr.fit(train_data)

In [131]:
# Print the coefficients and intercept for multinomial logistic regression
print("Coefficients: \n" + str(model.coefficientMatrix))
print("Intercept: " + str(model.interceptVector))

AttributeError: 'RandomForestRegressionModel' object has no attribute 'coefficientMatrix'

In [69]:
trainingSummary = model.summary

In [70]:
# for multiclass, we can inspect metrics on a per-label basis
print("False positive rate by label:")
for i, rate in enumerate(trainingSummary.falsePositiveRateByLabel):
    print("label %d: %s" % (i, rate))

False positive rate by label:
label 0: 1.0
label 1: 0.0
label 2: 0.0
label 3: 0.0
label 4: 0.0
label 5: 0.0
label 6: 0.0
label 7: 0.0
label 8: 0.0
label 9: 0.0
label 10: 0.0
label 11: 0.0
label 12: 0.0
label 13: 0.0
label 14: 0.0
label 15: 0.0
label 16: 0.0
label 17: 0.0
label 18: 0.0
label 19: 0.0
label 20: 0.0
label 21: 0.0
label 22: 0.0
label 23: 0.0
label 24: 0.0
label 25: 0.0
label 26: 0.0
label 27: 0.0
label 28: 0.0
label 29: 0.0
label 30: 0.0
label 31: 0.0
label 32: 0.0
label 33: 0.0
label 34: 0.0
label 35: 0.0
label 36: 0.0
label 37: 0.0
label 38: 0.0
label 39: 0.0
label 40: 0.0
label 41: 0.0
label 42: 0.0
label 43: 0.0
label 44: 0.0
label 45: 0.0
label 46: 0.0
label 47: 0.0
label 48: 0.0
label 49: 0.0
label 50: 0.0
label 51: 0.0
label 52: 0.0
label 53: 0.0
label 54: 0.0
label 55: 0.0
label 56: 0.0
label 57: 0.0
label 58: 0.0
label 59: 0.0
label 60: 0.0
label 61: 0.0
label 62: 0.0
label 63: 0.0
label 64: 0.0
label 65: 0.0
label 66: 0.0
label 67: 0.0
label 68: 0.0
label 69: 0.0


label 1094: 0.0
label 1095: 0.0
label 1096: 0.0
label 1097: 0.0
label 1098: 0.0
label 1099: 0.0
label 1100: 0.0
label 1101: 0.0
label 1102: 0.0
label 1103: 0.0
label 1104: 0.0
label 1105: 0.0
label 1106: 0.0
label 1107: 0.0
label 1108: 0.0
label 1109: 0.0
label 1110: 0.0
label 1111: 0.0
label 1112: 0.0
label 1113: 0.0
label 1114: 0.0
label 1115: 0.0
label 1116: 0.0
label 1117: 0.0
label 1118: 0.0
label 1119: 0.0
label 1120: 0.0
label 1121: 0.0
label 1122: 0.0
label 1123: 0.0
label 1124: 0.0
label 1125: 0.0
label 1126: 0.0
label 1127: 0.0
label 1128: 0.0
label 1129: 0.0
label 1130: 0.0
label 1131: 0.0
label 1132: 0.0
label 1133: 0.0
label 1134: 0.0
label 1135: 0.0
label 1136: 0.0
label 1137: 0.0
label 1138: 0.0
label 1139: 0.0
label 1140: 0.0
label 1141: 0.0
label 1142: 0.0
label 1143: 0.0
label 1144: 0.0
label 1145: 0.0
label 1146: 0.0
label 1147: 0.0
label 1148: 0.0
label 1149: 0.0
label 1150: 0.0
label 1151: 0.0
label 1152: 0.0
label 1153: 0.0
label 1154: 0.0
label 1155: 0.0
label 11

label 2258: 0.0
label 2259: 0.0
label 2260: 0.0
label 2261: 0.0
label 2262: 0.0
label 2263: 0.0
label 2264: 0.0
label 2265: 0.0
label 2266: 0.0
label 2267: 0.0
label 2268: 0.0
label 2269: 0.0
label 2270: 0.0
label 2271: 0.0
label 2272: 0.0
label 2273: 0.0
label 2274: 0.0
label 2275: 0.0
label 2276: 0.0
label 2277: 0.0
label 2278: 0.0
label 2279: 0.0
label 2280: 0.0
label 2281: 0.0
label 2282: 0.0
label 2283: 0.0
label 2284: 0.0
label 2285: 0.0
label 2286: 0.0
label 2287: 0.0
label 2288: 0.0
label 2289: 0.0
label 2290: 0.0
label 2291: 0.0
label 2292: 0.0
label 2293: 0.0
label 2294: 0.0
label 2295: 0.0
label 2296: 0.0
label 2297: 0.0
label 2298: 0.0
label 2299: 0.0
label 2300: 0.0
label 2301: 0.0
label 2302: 0.0
label 2303: 0.0
label 2304: 0.0
label 2305: 0.0
label 2306: 0.0
label 2307: 0.0
label 2308: 0.0
label 2309: 0.0
label 2310: 0.0
label 2311: 0.0
label 2312: 0.0
label 2313: 0.0
label 2314: 0.0
label 2315: 0.0
label 2316: 0.0
label 2317: 0.0
label 2318: 0.0
label 2319: 0.0
label 23

label 3675: 0.0
label 3676: 0.0
label 3677: 0.0
label 3678: 0.0
label 3679: 0.0
label 3680: 0.0
label 3681: 0.0
label 3682: 0.0
label 3683: 0.0
label 3684: 0.0
label 3685: 0.0
label 3686: 0.0
label 3687: 0.0
label 3688: 0.0
label 3689: 0.0
label 3690: 0.0
label 3691: 0.0
label 3692: 0.0
label 3693: 0.0
label 3694: 0.0
label 3695: 0.0
label 3696: 0.0
label 3697: 0.0
label 3698: 0.0
label 3699: 0.0
label 3700: 0.0
label 3701: 0.0
label 3702: 0.0
label 3703: 0.0
label 3704: 0.0
label 3705: 0.0
label 3706: 0.0
label 3707: 0.0
label 3708: 0.0
label 3709: 0.0
label 3710: 0.0
label 3711: 0.0
label 3712: 0.0
label 3713: 0.0
label 3714: 0.0
label 3715: 0.0
label 3716: 0.0
label 3717: 0.0
label 3718: 0.0
label 3719: 0.0
label 3720: 0.0
label 3721: 0.0
label 3722: 0.0
label 3723: 0.0
label 3724: 0.0
label 3725: 0.0
label 3726: 0.0
label 3727: 0.0
label 3728: 0.0
label 3729: 0.0
label 3730: 0.0
label 3731: 0.0
label 3732: 0.0
label 3733: 0.0
label 3734: 0.0
label 3735: 0.0
label 3736: 0.0
label 37

In [71]:
print("True positive rate by label:")
for i, rate in enumerate(trainingSummary.truePositiveRateByLabel):
    print("label %d: %s" % (i, rate))

True positive rate by label:
label 0: 1.0
label 1: 0.0
label 2: 0.0
label 3: 0.0
label 4: 0.0
label 5: 0.0
label 6: 0.0
label 7: 0.0
label 8: 0.0
label 9: 0.0
label 10: 0.0
label 11: 0.0
label 12: 0.0
label 13: 0.0
label 14: 0.0
label 15: 0.0
label 16: 0.0
label 17: 0.0
label 18: 0.0
label 19: 0.0
label 20: 0.0
label 21: 0.0
label 22: 0.0
label 23: 0.0
label 24: 0.0
label 25: 0.0
label 26: 0.0
label 27: 0.0
label 28: 0.0
label 29: 0.0
label 30: 0.0
label 31: 0.0
label 32: 0.0
label 33: 0.0
label 34: 0.0
label 35: 0.0
label 36: 0.0
label 37: 0.0
label 38: 0.0
label 39: 0.0
label 40: 0.0
label 41: 0.0
label 42: 0.0
label 43: 0.0
label 44: 0.0
label 45: 0.0
label 46: 0.0
label 47: 0.0
label 48: 0.0
label 49: 0.0
label 50: 0.0
label 51: 0.0
label 52: 0.0
label 53: 0.0
label 54: 0.0
label 55: 0.0
label 56: 0.0
label 57: 0.0
label 58: 0.0
label 59: 0.0
label 60: 0.0
label 61: 0.0
label 62: 0.0
label 63: 0.0
label 64: 0.0
label 65: 0.0
label 66: 0.0
label 67: 0.0
label 68: 0.0
label 69: 0.0
l

label 1156: 0.0
label 1157: 0.0
label 1158: 0.0
label 1159: 0.0
label 1160: 0.0
label 1161: 0.0
label 1162: 0.0
label 1163: 0.0
label 1164: 0.0
label 1165: 0.0
label 1166: 0.0
label 1167: 0.0
label 1168: 0.0
label 1169: 0.0
label 1170: 0.0
label 1171: 0.0
label 1172: 0.0
label 1173: 0.0
label 1174: 0.0
label 1175: 0.0
label 1176: 0.0
label 1177: 0.0
label 1178: 0.0
label 1179: 0.0
label 1180: 0.0
label 1181: 0.0
label 1182: 0.0
label 1183: 0.0
label 1184: 0.0
label 1185: 0.0
label 1186: 0.0
label 1187: 0.0
label 1188: 0.0
label 1189: 0.0
label 1190: 0.0
label 1191: 0.0
label 1192: 0.0
label 1193: 0.0
label 1194: 0.0
label 1195: 0.0
label 1196: 0.0
label 1197: 0.0
label 1198: 0.0
label 1199: 0.0
label 1200: 0.0
label 1201: 0.0
label 1202: 0.0
label 1203: 0.0
label 1204: 0.0
label 1205: 0.0
label 1206: 0.0
label 1207: 0.0
label 1208: 0.0
label 1209: 0.0
label 1210: 0.0
label 1211: 0.0
label 1212: 0.0
label 1213: 0.0
label 1214: 0.0
label 1215: 0.0
label 1216: 0.0
label 1217: 0.0
label 12

label 2202: 0.0
label 2203: 0.0
label 2204: 0.0
label 2205: 0.0
label 2206: 0.0
label 2207: 0.0
label 2208: 0.0
label 2209: 0.0
label 2210: 0.0
label 2211: 0.0
label 2212: 0.0
label 2213: 0.0
label 2214: 0.0
label 2215: 0.0
label 2216: 0.0
label 2217: 0.0
label 2218: 0.0
label 2219: 0.0
label 2220: 0.0
label 2221: 0.0
label 2222: 0.0
label 2223: 0.0
label 2224: 0.0
label 2225: 0.0
label 2226: 0.0
label 2227: 0.0
label 2228: 0.0
label 2229: 0.0
label 2230: 0.0
label 2231: 0.0
label 2232: 0.0
label 2233: 0.0
label 2234: 0.0
label 2235: 0.0
label 2236: 0.0
label 2237: 0.0
label 2238: 0.0
label 2239: 0.0
label 2240: 0.0
label 2241: 0.0
label 2242: 0.0
label 2243: 0.0
label 2244: 0.0
label 2245: 0.0
label 2246: 0.0
label 2247: 0.0
label 2248: 0.0
label 2249: 0.0
label 2250: 0.0
label 2251: 0.0
label 2252: 0.0
label 2253: 0.0
label 2254: 0.0
label 2255: 0.0
label 2256: 0.0
label 2257: 0.0
label 2258: 0.0
label 2259: 0.0
label 2260: 0.0
label 2261: 0.0
label 2262: 0.0
label 2263: 0.0
label 22

label 3419: 0.0
label 3420: 0.0
label 3421: 0.0
label 3422: 0.0
label 3423: 0.0
label 3424: 0.0
label 3425: 0.0
label 3426: 0.0
label 3427: 0.0
label 3428: 0.0
label 3429: 0.0
label 3430: 0.0
label 3431: 0.0
label 3432: 0.0
label 3433: 0.0
label 3434: 0.0
label 3435: 0.0
label 3436: 0.0
label 3437: 0.0
label 3438: 0.0
label 3439: 0.0
label 3440: 0.0
label 3441: 0.0
label 3442: 0.0
label 3443: 0.0
label 3444: 0.0
label 3445: 0.0
label 3446: 0.0
label 3447: 0.0
label 3448: 0.0
label 3449: 0.0
label 3450: 0.0
label 3451: 0.0
label 3452: 0.0
label 3453: 0.0
label 3454: 0.0
label 3455: 0.0
label 3456: 0.0
label 3457: 0.0
label 3458: 0.0
label 3459: 0.0
label 3460: 0.0
label 3461: 0.0
label 3462: 0.0
label 3463: 0.0
label 3464: 0.0
label 3465: 0.0
label 3466: 0.0
label 3467: 0.0
label 3468: 0.0
label 3469: 0.0
label 3470: 0.0
label 3471: 0.0
label 3472: 0.0
label 3473: 0.0
label 3474: 0.0
label 3475: 0.0
label 3476: 0.0
label 3477: 0.0
label 3478: 0.0
label 3479: 0.0
label 3480: 0.0
label 34

In [72]:
print("Precision by label:")
for i, prec in enumerate(trainingSummary.precisionByLabel):
    print("label %d: %s" % (i, prec))

Precision by label:
label 0: 0.15386689435132342
label 1: 0.0
label 2: 0.0
label 3: 0.0
label 4: 0.0
label 5: 0.0
label 6: 0.0
label 7: 0.0
label 8: 0.0
label 9: 0.0
label 10: 0.0
label 11: 0.0
label 12: 0.0
label 13: 0.0
label 14: 0.0
label 15: 0.0
label 16: 0.0
label 17: 0.0
label 18: 0.0
label 19: 0.0
label 20: 0.0
label 21: 0.0
label 22: 0.0
label 23: 0.0
label 24: 0.0
label 25: 0.0
label 26: 0.0
label 27: 0.0
label 28: 0.0
label 29: 0.0
label 30: 0.0
label 31: 0.0
label 32: 0.0
label 33: 0.0
label 34: 0.0
label 35: 0.0
label 36: 0.0
label 37: 0.0
label 38: 0.0
label 39: 0.0
label 40: 0.0
label 41: 0.0
label 42: 0.0
label 43: 0.0
label 44: 0.0
label 45: 0.0
label 46: 0.0
label 47: 0.0
label 48: 0.0
label 49: 0.0
label 50: 0.0
label 51: 0.0
label 52: 0.0
label 53: 0.0
label 54: 0.0
label 55: 0.0
label 56: 0.0
label 57: 0.0
label 58: 0.0
label 59: 0.0
label 60: 0.0
label 61: 0.0
label 62: 0.0
label 63: 0.0
label 64: 0.0
label 65: 0.0
label 66: 0.0
label 67: 0.0
label 68: 0.0
label 69

label 852: 0.0
label 853: 0.0
label 854: 0.0
label 855: 0.0
label 856: 0.0
label 857: 0.0
label 858: 0.0
label 859: 0.0
label 860: 0.0
label 861: 0.0
label 862: 0.0
label 863: 0.0
label 864: 0.0
label 865: 0.0
label 866: 0.0
label 867: 0.0
label 868: 0.0
label 869: 0.0
label 870: 0.0
label 871: 0.0
label 872: 0.0
label 873: 0.0
label 874: 0.0
label 875: 0.0
label 876: 0.0
label 877: 0.0
label 878: 0.0
label 879: 0.0
label 880: 0.0
label 881: 0.0
label 882: 0.0
label 883: 0.0
label 884: 0.0
label 885: 0.0
label 886: 0.0
label 887: 0.0
label 888: 0.0
label 889: 0.0
label 890: 0.0
label 891: 0.0
label 892: 0.0
label 893: 0.0
label 894: 0.0
label 895: 0.0
label 896: 0.0
label 897: 0.0
label 898: 0.0
label 899: 0.0
label 900: 0.0
label 901: 0.0
label 902: 0.0
label 903: 0.0
label 904: 0.0
label 905: 0.0
label 906: 0.0
label 907: 0.0
label 908: 0.0
label 909: 0.0
label 910: 0.0
label 911: 0.0
label 912: 0.0
label 913: 0.0
label 914: 0.0
label 915: 0.0
label 916: 0.0
label 917: 0.0
label 918:

label 2080: 0.0
label 2081: 0.0
label 2082: 0.0
label 2083: 0.0
label 2084: 0.0
label 2085: 0.0
label 2086: 0.0
label 2087: 0.0
label 2088: 0.0
label 2089: 0.0
label 2090: 0.0
label 2091: 0.0
label 2092: 0.0
label 2093: 0.0
label 2094: 0.0
label 2095: 0.0
label 2096: 0.0
label 2097: 0.0
label 2098: 0.0
label 2099: 0.0
label 2100: 0.0
label 2101: 0.0
label 2102: 0.0
label 2103: 0.0
label 2104: 0.0
label 2105: 0.0
label 2106: 0.0
label 2107: 0.0
label 2108: 0.0
label 2109: 0.0
label 2110: 0.0
label 2111: 0.0
label 2112: 0.0
label 2113: 0.0
label 2114: 0.0
label 2115: 0.0
label 2116: 0.0
label 2117: 0.0
label 2118: 0.0
label 2119: 0.0
label 2120: 0.0
label 2121: 0.0
label 2122: 0.0
label 2123: 0.0
label 2124: 0.0
label 2125: 0.0
label 2126: 0.0
label 2127: 0.0
label 2128: 0.0
label 2129: 0.0
label 2130: 0.0
label 2131: 0.0
label 2132: 0.0
label 2133: 0.0
label 2134: 0.0
label 2135: 0.0
label 2136: 0.0
label 2137: 0.0
label 2138: 0.0
label 2139: 0.0
label 2140: 0.0
label 2141: 0.0
label 21

label 2739: 0.0
label 2740: 0.0
label 2741: 0.0
label 2742: 0.0
label 2743: 0.0
label 2744: 0.0
label 2745: 0.0
label 2746: 0.0
label 2747: 0.0
label 2748: 0.0
label 2749: 0.0
label 2750: 0.0
label 2751: 0.0
label 2752: 0.0
label 2753: 0.0
label 2754: 0.0
label 2755: 0.0
label 2756: 0.0
label 2757: 0.0
label 2758: 0.0
label 2759: 0.0
label 2760: 0.0
label 2761: 0.0
label 2762: 0.0
label 2763: 0.0
label 2764: 0.0
label 2765: 0.0
label 2766: 0.0
label 2767: 0.0
label 2768: 0.0
label 2769: 0.0
label 2770: 0.0
label 2771: 0.0
label 2772: 0.0
label 2773: 0.0
label 2774: 0.0
label 2775: 0.0
label 2776: 0.0
label 2777: 0.0
label 2778: 0.0
label 2779: 0.0
label 2780: 0.0
label 2781: 0.0
label 2782: 0.0
label 2783: 0.0
label 2784: 0.0
label 2785: 0.0
label 2786: 0.0
label 2787: 0.0
label 2788: 0.0
label 2789: 0.0
label 2790: 0.0
label 2791: 0.0
label 2792: 0.0
label 2793: 0.0
label 2794: 0.0
label 2795: 0.0
label 2796: 0.0
label 2797: 0.0
label 2798: 0.0
label 2799: 0.0
label 2800: 0.0
label 28

label 3628: 0.0
label 3629: 0.0
label 3630: 0.0
label 3631: 0.0
label 3632: 0.0
label 3633: 0.0
label 3634: 0.0
label 3635: 0.0
label 3636: 0.0
label 3637: 0.0
label 3638: 0.0
label 3639: 0.0
label 3640: 0.0
label 3641: 0.0
label 3642: 0.0
label 3643: 0.0
label 3644: 0.0
label 3645: 0.0
label 3646: 0.0
label 3647: 0.0
label 3648: 0.0
label 3649: 0.0
label 3650: 0.0
label 3651: 0.0
label 3652: 0.0
label 3653: 0.0
label 3654: 0.0
label 3655: 0.0
label 3656: 0.0
label 3657: 0.0
label 3658: 0.0
label 3659: 0.0
label 3660: 0.0
label 3661: 0.0
label 3662: 0.0
label 3663: 0.0
label 3664: 0.0
label 3665: 0.0
label 3666: 0.0
label 3667: 0.0
label 3668: 0.0
label 3669: 0.0
label 3670: 0.0
label 3671: 0.0
label 3672: 0.0
label 3673: 0.0
label 3674: 0.0
label 3675: 0.0
label 3676: 0.0
label 3677: 0.0
label 3678: 0.0
label 3679: 0.0
label 3680: 0.0
label 3681: 0.0
label 3682: 0.0
label 3683: 0.0
label 3684: 0.0
label 3685: 0.0
label 3686: 0.0
label 3687: 0.0
label 3688: 0.0
label 3689: 0.0
label 36

label 4745: 0.0
label 4746: 0.0
label 4747: 0.0
label 4748: 0.0
label 4749: 0.0
label 4750: 0.0
label 4751: 0.0
label 4752: 0.0
label 4753: 0.0
label 4754: 0.0
label 4755: 0.0
label 4756: 0.0
label 4757: 0.0
label 4758: 0.0
label 4759: 0.0
label 4760: 0.0
label 4761: 0.0
label 4762: 0.0
label 4763: 0.0
label 4764: 0.0
label 4765: 0.0
label 4766: 0.0
label 4767: 0.0
label 4768: 0.0
label 4769: 0.0
label 4770: 0.0
label 4771: 0.0
label 4772: 0.0
label 4773: 0.0
label 4774: 0.0
label 4775: 0.0
label 4776: 0.0
label 4777: 0.0
label 4778: 0.0
label 4779: 0.0
label 4780: 0.0
label 4781: 0.0
label 4782: 0.0
label 4783: 0.0
label 4784: 0.0
label 4785: 0.0
label 4786: 0.0
label 4787: 0.0
label 4788: 0.0
label 4789: 0.0
label 4790: 0.0
label 4791: 0.0
label 4792: 0.0
label 4793: 0.0
label 4794: 0.0
label 4795: 0.0
label 4796: 0.0
label 4797: 0.0
label 4798: 0.0
label 4799: 0.0
label 4800: 0.0
label 4801: 0.0
label 4802: 0.0
label 4803: 0.0
label 4804: 0.0
label 4805: 0.0
label 4806: 0.0
label 48

In [73]:
print("Recall by label:")
for i, rec in enumerate(trainingSummary.recallByLabel):
    print("label %d: %s" % (i, rec))

Recall by label:
label 0: 1.0
label 1: 0.0
label 2: 0.0
label 3: 0.0
label 4: 0.0
label 5: 0.0
label 6: 0.0
label 7: 0.0
label 8: 0.0
label 9: 0.0
label 10: 0.0
label 11: 0.0
label 12: 0.0
label 13: 0.0
label 14: 0.0
label 15: 0.0
label 16: 0.0
label 17: 0.0
label 18: 0.0
label 19: 0.0
label 20: 0.0
label 21: 0.0
label 22: 0.0
label 23: 0.0
label 24: 0.0
label 25: 0.0
label 26: 0.0
label 27: 0.0
label 28: 0.0
label 29: 0.0
label 30: 0.0
label 31: 0.0
label 32: 0.0
label 33: 0.0
label 34: 0.0
label 35: 0.0
label 36: 0.0
label 37: 0.0
label 38: 0.0
label 39: 0.0
label 40: 0.0
label 41: 0.0
label 42: 0.0
label 43: 0.0
label 44: 0.0
label 45: 0.0
label 46: 0.0
label 47: 0.0
label 48: 0.0
label 49: 0.0
label 50: 0.0
label 51: 0.0
label 52: 0.0
label 53: 0.0
label 54: 0.0
label 55: 0.0
label 56: 0.0
label 57: 0.0
label 58: 0.0
label 59: 0.0
label 60: 0.0
label 61: 0.0
label 62: 0.0
label 63: 0.0
label 64: 0.0
label 65: 0.0
label 66: 0.0
label 67: 0.0
label 68: 0.0
label 69: 0.0
label 70: 0.0

label 755: 0.0
label 756: 0.0
label 757: 0.0
label 758: 0.0
label 759: 0.0
label 760: 0.0
label 761: 0.0
label 762: 0.0
label 763: 0.0
label 764: 0.0
label 765: 0.0
label 766: 0.0
label 767: 0.0
label 768: 0.0
label 769: 0.0
label 770: 0.0
label 771: 0.0
label 772: 0.0
label 773: 0.0
label 774: 0.0
label 775: 0.0
label 776: 0.0
label 777: 0.0
label 778: 0.0
label 779: 0.0
label 780: 0.0
label 781: 0.0
label 782: 0.0
label 783: 0.0
label 784: 0.0
label 785: 0.0
label 786: 0.0
label 787: 0.0
label 788: 0.0
label 789: 0.0
label 790: 0.0
label 791: 0.0
label 792: 0.0
label 793: 0.0
label 794: 0.0
label 795: 0.0
label 796: 0.0
label 797: 0.0
label 798: 0.0
label 799: 0.0
label 800: 0.0
label 801: 0.0
label 802: 0.0
label 803: 0.0
label 804: 0.0
label 805: 0.0
label 806: 0.0
label 807: 0.0
label 808: 0.0
label 809: 0.0
label 810: 0.0
label 811: 0.0
label 812: 0.0
label 813: 0.0
label 814: 0.0
label 815: 0.0
label 816: 0.0
label 817: 0.0
label 818: 0.0
label 819: 0.0
label 820: 0.0
label 821:

label 1458: 0.0
label 1459: 0.0
label 1460: 0.0
label 1461: 0.0
label 1462: 0.0
label 1463: 0.0
label 1464: 0.0
label 1465: 0.0
label 1466: 0.0
label 1467: 0.0
label 1468: 0.0
label 1469: 0.0
label 1470: 0.0
label 1471: 0.0
label 1472: 0.0
label 1473: 0.0
label 1474: 0.0
label 1475: 0.0
label 1476: 0.0
label 1477: 0.0
label 1478: 0.0
label 1479: 0.0
label 1480: 0.0
label 1481: 0.0
label 1482: 0.0
label 1483: 0.0
label 1484: 0.0
label 1485: 0.0
label 1486: 0.0
label 1487: 0.0
label 1488: 0.0
label 1489: 0.0
label 1490: 0.0
label 1491: 0.0
label 1492: 0.0
label 1493: 0.0
label 1494: 0.0
label 1495: 0.0
label 1496: 0.0
label 1497: 0.0
label 1498: 0.0
label 1499: 0.0
label 1500: 0.0
label 1501: 0.0
label 1502: 0.0
label 1503: 0.0
label 1504: 0.0
label 1505: 0.0
label 1506: 0.0
label 1507: 0.0
label 1508: 0.0
label 1509: 0.0
label 1510: 0.0
label 1511: 0.0
label 1512: 0.0
label 1513: 0.0
label 1514: 0.0
label 1515: 0.0
label 1516: 0.0
label 1517: 0.0
label 1518: 0.0
label 1519: 0.0
label 15

label 2040: 0.0
label 2041: 0.0
label 2042: 0.0
label 2043: 0.0
label 2044: 0.0
label 2045: 0.0
label 2046: 0.0
label 2047: 0.0
label 2048: 0.0
label 2049: 0.0
label 2050: 0.0
label 2051: 0.0
label 2052: 0.0
label 2053: 0.0
label 2054: 0.0
label 2055: 0.0
label 2056: 0.0
label 2057: 0.0
label 2058: 0.0
label 2059: 0.0
label 2060: 0.0
label 2061: 0.0
label 2062: 0.0
label 2063: 0.0
label 2064: 0.0
label 2065: 0.0
label 2066: 0.0
label 2067: 0.0
label 2068: 0.0
label 2069: 0.0
label 2070: 0.0
label 2071: 0.0
label 2072: 0.0
label 2073: 0.0
label 2074: 0.0
label 2075: 0.0
label 2076: 0.0
label 2077: 0.0
label 2078: 0.0
label 2079: 0.0
label 2080: 0.0
label 2081: 0.0
label 2082: 0.0
label 2083: 0.0
label 2084: 0.0
label 2085: 0.0
label 2086: 0.0
label 2087: 0.0
label 2088: 0.0
label 2089: 0.0
label 2090: 0.0
label 2091: 0.0
label 2092: 0.0
label 2093: 0.0
label 2094: 0.0
label 2095: 0.0
label 2096: 0.0
label 2097: 0.0
label 2098: 0.0
label 2099: 0.0
label 2100: 0.0
label 2101: 0.0
label 21

label 2564: 0.0
label 2565: 0.0
label 2566: 0.0
label 2567: 0.0
label 2568: 0.0
label 2569: 0.0
label 2570: 0.0
label 2571: 0.0
label 2572: 0.0
label 2573: 0.0
label 2574: 0.0
label 2575: 0.0
label 2576: 0.0
label 2577: 0.0
label 2578: 0.0
label 2579: 0.0
label 2580: 0.0
label 2581: 0.0
label 2582: 0.0
label 2583: 0.0
label 2584: 0.0
label 2585: 0.0
label 2586: 0.0
label 2587: 0.0
label 2588: 0.0
label 2589: 0.0
label 2590: 0.0
label 2591: 0.0
label 2592: 0.0
label 2593: 0.0
label 2594: 0.0
label 2595: 0.0
label 2596: 0.0
label 2597: 0.0
label 2598: 0.0
label 2599: 0.0
label 2600: 0.0
label 2601: 0.0
label 2602: 0.0
label 2603: 0.0
label 2604: 0.0
label 2605: 0.0
label 2606: 0.0
label 2607: 0.0
label 2608: 0.0
label 2609: 0.0
label 2610: 0.0
label 2611: 0.0
label 2612: 0.0
label 2613: 0.0
label 2614: 0.0
label 2615: 0.0
label 2616: 0.0
label 2617: 0.0
label 2618: 0.0
label 2619: 0.0
label 2620: 0.0
label 2621: 0.0
label 2622: 0.0
label 2623: 0.0
label 2624: 0.0
label 2625: 0.0
label 26

label 3406: 0.0
label 3407: 0.0
label 3408: 0.0
label 3409: 0.0
label 3410: 0.0
label 3411: 0.0
label 3412: 0.0
label 3413: 0.0
label 3414: 0.0
label 3415: 0.0
label 3416: 0.0
label 3417: 0.0
label 3418: 0.0
label 3419: 0.0
label 3420: 0.0
label 3421: 0.0
label 3422: 0.0
label 3423: 0.0
label 3424: 0.0
label 3425: 0.0
label 3426: 0.0
label 3427: 0.0
label 3428: 0.0
label 3429: 0.0
label 3430: 0.0
label 3431: 0.0
label 3432: 0.0
label 3433: 0.0
label 3434: 0.0
label 3435: 0.0
label 3436: 0.0
label 3437: 0.0
label 3438: 0.0
label 3439: 0.0
label 3440: 0.0
label 3441: 0.0
label 3442: 0.0
label 3443: 0.0
label 3444: 0.0
label 3445: 0.0
label 3446: 0.0
label 3447: 0.0
label 3448: 0.0
label 3449: 0.0
label 3450: 0.0
label 3451: 0.0
label 3452: 0.0
label 3453: 0.0
label 3454: 0.0
label 3455: 0.0
label 3456: 0.0
label 3457: 0.0
label 3458: 0.0
label 3459: 0.0
label 3460: 0.0
label 3461: 0.0
label 3462: 0.0
label 3463: 0.0
label 3464: 0.0
label 3465: 0.0
label 3466: 0.0
label 3467: 0.0
label 34

label 3967: 0.0
label 3968: 0.0
label 3969: 0.0
label 3970: 0.0
label 3971: 0.0
label 3972: 0.0
label 3973: 0.0
label 3974: 0.0
label 3975: 0.0
label 3976: 0.0
label 3977: 0.0
label 3978: 0.0
label 3979: 0.0
label 3980: 0.0
label 3981: 0.0
label 3982: 0.0
label 3983: 0.0
label 3984: 0.0
label 3985: 0.0
label 3986: 0.0
label 3987: 0.0
label 3988: 0.0
label 3989: 0.0
label 3990: 0.0
label 3991: 0.0
label 3992: 0.0
label 3993: 0.0
label 3994: 0.0
label 3995: 0.0
label 3996: 0.0
label 3997: 0.0
label 3998: 0.0
label 3999: 0.0
label 4000: 0.0
label 4001: 0.0
label 4002: 0.0
label 4003: 0.0
label 4004: 0.0
label 4005: 0.0
label 4006: 0.0
label 4007: 0.0
label 4008: 0.0
label 4009: 0.0
label 4010: 0.0
label 4011: 0.0
label 4012: 0.0
label 4013: 0.0
label 4014: 0.0
label 4015: 0.0
label 4016: 0.0
label 4017: 0.0
label 4018: 0.0
label 4019: 0.0
label 4020: 0.0
label 4021: 0.0
label 4022: 0.0
label 4023: 0.0
label 4024: 0.0
label 4025: 0.0
label 4026: 0.0
label 4027: 0.0
label 4028: 0.0
label 40

In [74]:
print("F-measure by label:")
for i, f in enumerate(trainingSummary.fMeasureByLabel()):
    print("label %d: %s" % (i, f))

F-measure by label:
label 0: 0.266697822954395
label 1: 0.0
label 2: 0.0
label 3: 0.0
label 4: 0.0
label 5: 0.0
label 6: 0.0
label 7: 0.0
label 8: 0.0
label 9: 0.0
label 10: 0.0
label 11: 0.0
label 12: 0.0
label 13: 0.0
label 14: 0.0
label 15: 0.0
label 16: 0.0
label 17: 0.0
label 18: 0.0
label 19: 0.0
label 20: 0.0
label 21: 0.0
label 22: 0.0
label 23: 0.0
label 24: 0.0
label 25: 0.0
label 26: 0.0
label 27: 0.0
label 28: 0.0
label 29: 0.0
label 30: 0.0
label 31: 0.0
label 32: 0.0
label 33: 0.0
label 34: 0.0
label 35: 0.0
label 36: 0.0
label 37: 0.0
label 38: 0.0
label 39: 0.0
label 40: 0.0
label 41: 0.0
label 42: 0.0
label 43: 0.0
label 44: 0.0
label 45: 0.0
label 46: 0.0
label 47: 0.0
label 48: 0.0
label 49: 0.0
label 50: 0.0
label 51: 0.0
label 52: 0.0
label 53: 0.0
label 54: 0.0
label 55: 0.0
label 56: 0.0
label 57: 0.0
label 58: 0.0
label 59: 0.0
label 60: 0.0
label 61: 0.0
label 62: 0.0
label 63: 0.0
label 64: 0.0
label 65: 0.0
label 66: 0.0
label 67: 0.0
label 68: 0.0
label 69: 

label 1034: 0.0
label 1035: 0.0
label 1036: 0.0
label 1037: 0.0
label 1038: 0.0
label 1039: 0.0
label 1040: 0.0
label 1041: 0.0
label 1042: 0.0
label 1043: 0.0
label 1044: 0.0
label 1045: 0.0
label 1046: 0.0
label 1047: 0.0
label 1048: 0.0
label 1049: 0.0
label 1050: 0.0
label 1051: 0.0
label 1052: 0.0
label 1053: 0.0
label 1054: 0.0
label 1055: 0.0
label 1056: 0.0
label 1057: 0.0
label 1058: 0.0
label 1059: 0.0
label 1060: 0.0
label 1061: 0.0
label 1062: 0.0
label 1063: 0.0
label 1064: 0.0
label 1065: 0.0
label 1066: 0.0
label 1067: 0.0
label 1068: 0.0
label 1069: 0.0
label 1070: 0.0
label 1071: 0.0
label 1072: 0.0
label 1073: 0.0
label 1074: 0.0
label 1075: 0.0
label 1076: 0.0
label 1077: 0.0
label 1078: 0.0
label 1079: 0.0
label 1080: 0.0
label 1081: 0.0
label 1082: 0.0
label 1083: 0.0
label 1084: 0.0
label 1085: 0.0
label 1086: 0.0
label 1087: 0.0
label 1088: 0.0
label 1089: 0.0
label 1090: 0.0
label 1091: 0.0
label 1092: 0.0
label 1093: 0.0
label 1094: 0.0
label 1095: 0.0
label 10

label 1619: 0.0
label 1620: 0.0
label 1621: 0.0
label 1622: 0.0
label 1623: 0.0
label 1624: 0.0
label 1625: 0.0
label 1626: 0.0
label 1627: 0.0
label 1628: 0.0
label 1629: 0.0
label 1630: 0.0
label 1631: 0.0
label 1632: 0.0
label 1633: 0.0
label 1634: 0.0
label 1635: 0.0
label 1636: 0.0
label 1637: 0.0
label 1638: 0.0
label 1639: 0.0
label 1640: 0.0
label 1641: 0.0
label 1642: 0.0
label 1643: 0.0
label 1644: 0.0
label 1645: 0.0
label 1646: 0.0
label 1647: 0.0
label 1648: 0.0
label 1649: 0.0
label 1650: 0.0
label 1651: 0.0
label 1652: 0.0
label 1653: 0.0
label 1654: 0.0
label 1655: 0.0
label 1656: 0.0
label 1657: 0.0
label 1658: 0.0
label 1659: 0.0
label 1660: 0.0
label 1661: 0.0
label 1662: 0.0
label 1663: 0.0
label 1664: 0.0
label 1665: 0.0
label 1666: 0.0
label 1667: 0.0
label 1668: 0.0
label 1669: 0.0
label 1670: 0.0
label 1671: 0.0
label 1672: 0.0
label 1673: 0.0
label 1674: 0.0
label 1675: 0.0
label 1676: 0.0
label 1677: 0.0
label 1678: 0.0
label 1679: 0.0
label 1680: 0.0
label 16

label 2277: 0.0
label 2278: 0.0
label 2279: 0.0
label 2280: 0.0
label 2281: 0.0
label 2282: 0.0
label 2283: 0.0
label 2284: 0.0
label 2285: 0.0
label 2286: 0.0
label 2287: 0.0
label 2288: 0.0
label 2289: 0.0
label 2290: 0.0
label 2291: 0.0
label 2292: 0.0
label 2293: 0.0
label 2294: 0.0
label 2295: 0.0
label 2296: 0.0
label 2297: 0.0
label 2298: 0.0
label 2299: 0.0
label 2300: 0.0
label 2301: 0.0
label 2302: 0.0
label 2303: 0.0
label 2304: 0.0
label 2305: 0.0
label 2306: 0.0
label 2307: 0.0
label 2308: 0.0
label 2309: 0.0
label 2310: 0.0
label 2311: 0.0
label 2312: 0.0
label 2313: 0.0
label 2314: 0.0
label 2315: 0.0
label 2316: 0.0
label 2317: 0.0
label 2318: 0.0
label 2319: 0.0
label 2320: 0.0
label 2321: 0.0
label 2322: 0.0
label 2323: 0.0
label 2324: 0.0
label 2325: 0.0
label 2326: 0.0
label 2327: 0.0
label 2328: 0.0
label 2329: 0.0
label 2330: 0.0
label 2331: 0.0
label 2332: 0.0
label 2333: 0.0
label 2334: 0.0
label 2335: 0.0
label 2336: 0.0
label 2337: 0.0
label 2338: 0.0
label 23

label 2893: 0.0
label 2894: 0.0
label 2895: 0.0
label 2896: 0.0
label 2897: 0.0
label 2898: 0.0
label 2899: 0.0
label 2900: 0.0
label 2901: 0.0
label 2902: 0.0
label 2903: 0.0
label 2904: 0.0
label 2905: 0.0
label 2906: 0.0
label 2907: 0.0
label 2908: 0.0
label 2909: 0.0
label 2910: 0.0
label 2911: 0.0
label 2912: 0.0
label 2913: 0.0
label 2914: 0.0
label 2915: 0.0
label 2916: 0.0
label 2917: 0.0
label 2918: 0.0
label 2919: 0.0
label 2920: 0.0
label 2921: 0.0
label 2922: 0.0
label 2923: 0.0
label 2924: 0.0
label 2925: 0.0
label 2926: 0.0
label 2927: 0.0
label 2928: 0.0
label 2929: 0.0
label 2930: 0.0
label 2931: 0.0
label 2932: 0.0
label 2933: 0.0
label 2934: 0.0
label 2935: 0.0
label 2936: 0.0
label 2937: 0.0
label 2938: 0.0
label 2939: 0.0
label 2940: 0.0
label 2941: 0.0
label 2942: 0.0
label 2943: 0.0
label 2944: 0.0
label 2945: 0.0
label 2946: 0.0
label 2947: 0.0
label 2948: 0.0
label 2949: 0.0
label 2950: 0.0
label 2951: 0.0
label 2952: 0.0
label 2953: 0.0
label 2954: 0.0
label 29

label 3804: 0.0
label 3805: 0.0
label 3806: 0.0
label 3807: 0.0
label 3808: 0.0
label 3809: 0.0
label 3810: 0.0
label 3811: 0.0
label 3812: 0.0
label 3813: 0.0
label 3814: 0.0
label 3815: 0.0
label 3816: 0.0
label 3817: 0.0
label 3818: 0.0
label 3819: 0.0
label 3820: 0.0
label 3821: 0.0
label 3822: 0.0
label 3823: 0.0
label 3824: 0.0
label 3825: 0.0
label 3826: 0.0
label 3827: 0.0
label 3828: 0.0
label 3829: 0.0
label 3830: 0.0
label 3831: 0.0
label 3832: 0.0
label 3833: 0.0
label 3834: 0.0
label 3835: 0.0
label 3836: 0.0
label 3837: 0.0
label 3838: 0.0
label 3839: 0.0
label 3840: 0.0
label 3841: 0.0
label 3842: 0.0
label 3843: 0.0
label 3844: 0.0
label 3845: 0.0
label 3846: 0.0
label 3847: 0.0
label 3848: 0.0
label 3849: 0.0
label 3850: 0.0
label 3851: 0.0
label 3852: 0.0
label 3853: 0.0
label 3854: 0.0
label 3855: 0.0
label 3856: 0.0
label 3857: 0.0
label 3858: 0.0
label 3859: 0.0
label 3860: 0.0
label 3861: 0.0
label 3862: 0.0
label 3863: 0.0
label 3864: 0.0
label 3865: 0.0
label 38

label 4321: 0.0
label 4322: 0.0
label 4323: 0.0
label 4324: 0.0
label 4325: 0.0
label 4326: 0.0
label 4327: 0.0
label 4328: 0.0
label 4329: 0.0
label 4330: 0.0
label 4331: 0.0
label 4332: 0.0
label 4333: 0.0
label 4334: 0.0
label 4335: 0.0
label 4336: 0.0
label 4337: 0.0
label 4338: 0.0
label 4339: 0.0
label 4340: 0.0
label 4341: 0.0
label 4342: 0.0
label 4343: 0.0
label 4344: 0.0
label 4345: 0.0
label 4346: 0.0
label 4347: 0.0
label 4348: 0.0
label 4349: 0.0
label 4350: 0.0
label 4351: 0.0
label 4352: 0.0
label 4353: 0.0
label 4354: 0.0
label 4355: 0.0
label 4356: 0.0
label 4357: 0.0
label 4358: 0.0
label 4359: 0.0
label 4360: 0.0
label 4361: 0.0
label 4362: 0.0
label 4363: 0.0
label 4364: 0.0
label 4365: 0.0
label 4366: 0.0
label 4367: 0.0
label 4368: 0.0
label 4369: 0.0
label 4370: 0.0
label 4371: 0.0
label 4372: 0.0
label 4373: 0.0
label 4374: 0.0
label 4375: 0.0
label 4376: 0.0
label 4377: 0.0
label 4378: 0.0
label 4379: 0.0
label 4380: 0.0
label 4381: 0.0
label 4382: 0.0
label 43

In [75]:
accuracy = trainingSummary.accuracy

In [76]:
falsePositiveRate = trainingSummary.weightedFalsePositiveRate

In [77]:
truePositiveRate = trainingSummary.weightedTruePositiveRate

In [78]:
fMeasure = trainingSummary.weightedFMeasure()

In [79]:
precision = trainingSummary.weightedPrecision

In [80]:
recall = trainingSummary.weightedRecall

In [81]:
print("Accuracy: %s\nFPR: %s\nTPR: %s\nF-measure: %s\nPrecision: %s\nRecall: %s"
      % (accuracy, falsePositiveRate, truePositiveRate, fMeasure, precision, recall))

Accuracy: 0.15386689435132342
FPR: 0.15386689435132342
TPR: 0.15386689435132342
F-measure: 0.04103596574825185
Precision: 0.023675021177321325
Recall: 0.15386689435132342


In [82]:
###########

In [83]:
# from test set, zip labels with predicted labels and cast to float

act_pred_test_set = testData.map(lambda p: (p.label, LR_Model.predict(p.features))) \
                                    .map(lambda row: (row[0], row[1] * 1.0))

AttributeError: 'DataFrame' object has no attribute 'map'

In [None]:
act_pred_test_set.take(3)

### 3. Lasso and Ridge Regression

In [119]:
lasso = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=1)

In [120]:
model = lasso.fit(trainingData)

Exception ignored in: <bound method JavaModelWrapper.__del__ of <pyspark.mllib.evaluation.MulticlassMetrics object at 0x7fc047cbd978>>
Traceback (most recent call last):
  File "/usr/local/spark/python/pyspark/mllib/common.py", line 142, in __del__
    self._sc._gateway.detach(self._java_model)
AttributeError: 'MulticlassMetrics' object has no attribute '_sc'


In [121]:
predictions= model.transform(testData)

In [122]:
evaluator = RegressionEvaluator(
    labelCol="label", predictionCol="prediction", metricName="rmse")
rmse_lasso = evaluator.evaluate(predictions)

In [123]:
print("Root Mean Squared Error (RMSE) on test data = %g" % rmse_lasso)

Root Mean Squared Error (RMSE) on test data = 1592.8


In [124]:
trainingSummary = model.summary

In [125]:
accuracy = trainingSummary.accuracy
falsePositiveRate = trainingSummary.weightedFalsePositiveRate
truePositiveRate = trainingSummary.weightedTruePositiveRate
fMeasure = trainingSummary.weightedFMeasure()
precision = trainingSummary.weightedPrecision
recall = trainingSummary.weightedRecall

In [126]:
print("Accuracy: %s\nFPR: %s\nTPR: %s\nF-measure: %s\nPrecision: %s\nRecall: %s"
      % (accuracy, falsePositiveRate, truePositiveRate, fMeasure, precision, recall))

Accuracy: 0.15386689435132342
FPR: 0.15386689435132342
TPR: 0.15386689435132342
F-measure: 0.04103596574825185
Precision: 0.023675021177321325
Recall: 0.15386689435132342


In [None]:
###########

In [None]:
ridge = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0)

In [None]:
model = ridge.fit(trainingData)

In [None]:
predictions= model.transform(testData)

In [None]:
evaluator = RegressionEvaluator(
    labelCol="label", predictionCol="prediction", metricName="rmse")
rmse_ridge = evaluator.evaluate(predictions)

In [None]:
print("Root Mean Squared Error (RMSE) on test data = %g" % rmse_ridge)

# Model Evaluation

- RMSE
- ROC
- R^2

## References
WHO regions wikipedia