In [200]:
import pandas as pd
import json
import import_ipynb
import model
import pickle

In [201]:
allowedColumns = ['KlasifikacijaNesrece', 'UpravnaEnotaStoritve', 'UraPN', 'VNaselju',
       'Lokacija', 'VrstaCesteNaselja', 'SifraCesteNaselja',
       'TekstCesteNaselja', 'SifraOdsekaUlice', 'TekstOdsekaUlice',
       'StacionazaDogodka', 'OpisKraja', 'VzrokNesrece', 'TipNesrece',
       'VremenskeOkoliscine', 'StanjePrometa', 'StanjeVozisca', 'VrstaVozisca',
       'GeoKoordinataX', 'GeoKoordinataY', 'Povzrocitelj', 'Starost', 'Spol',
       'UEStalnegaPrebivalisca', 'Drzavljanstvo', 'PoskodbaUdelezenca',
       'VrstaUdelezenca', 'UporabaVarnostnegaPasu', 'VozniskiStazVLetih',
       'VrednostAlkotesta', 'VrednostStrokovnegaPregleda']

computeHeavyModels = ['SifraCesteNaselja', 'TekstCesteNaselja', 'SifraOdsekaUlice', 'TekstOdsekaUlice', 'StacionazaDogodka']

In [202]:
def saveMostFrequentValues():
    cleanedData = model.preprocessData()
    mostFrequent = dict()

    for column in allowedColumns:
        maxCount = cleanedData[column].value_counts().idxmax()
        mostFrequent[column] = str(maxCount)
    with open("mostFrequent.json", "w") as f1:
        json.dump(mostFrequent, f1, indent=4)


In [203]:
def loadMostFrequentValues():
    with open("mostFrequent.json", 'r') as f:
        mostFrequent = json.load(f)
    
    return mostFrequent

In [204]:
def generatePredictionDataFrame(predictionData, columns):
    mostFrequent = loadMostFrequentValues()
    jsonPath = "data_To_Prediction/"
    generatedPrediction = dict() 
    for column in columns:
        if (predictionData[column] == "-1"):
            val = int(mostFrequent[column])
            generatedPrediction[column] = val
            continue
        try:
            val = float(predictionData[column])

            match column:
                case "UraPN":
                    val //= 1
                case "GeoKoordinataX":
                    val //= 10000
                case "GeoKoordinataY":
                    val //= 10000
                case "Starost":
                    val //= 10
                case "VozniskiStazVLetih":
                    val //= 10
                case "VrednostAlkotesta":
                    val *= 10
                case "VrednostStrokovnegaPregleda":
                    val *= 10
            
            generatedPrediction[column] = int(val)
        except:
            with open(jsonPath + column + ".json", 'r') as f:
                mapping = json.load(f)
            val = mapping[predictionData[column]]
            generatedPrediction[column] = val

    generatedDF = pd.DataFrame([generatedPrediction])
    return generatedDF

In [205]:
def convertPrediction(prediction, column):
    jsonPath = "prediction_To_Data/"
    match column:
        case "UraPN":
            fromHour = str(prediction) + ".00"
            toHour = "0.00" if prediction == 23 else str(prediction + 1) + ".00"
            prediction = fromHour + " - " + toHour
            return prediction
        case "GeoKoordinataX":
            prediction = prediction * 10000
            prediction = str(prediction) + " - " + str(prediction + 10000)
            return prediction
        case "GeoKoordinataY":
            prediction = prediction * 10000
            prediction = str(prediction) + " - " + str(prediction + 10000)
            return prediction
        case "Starost":
            prediction = prediction * 10
            prediction = str(prediction) + " - " + str(prediction + 10)
            return prediction
        case "VozniskiStazVLetih":
            prediction = prediction * 10
            prediction = str(prediction) + " - " + str(prediction + 10)
            return prediction
        case "VrednostAlkotesta":
            prediction /= 10 
            prediction = str(prediction) + " - " + str(prediction + 0.1)
            return prediction
        case "VrednostStrokovnegaPregleda":
            prediction /= 10 
            prediction = str(prediction) + " - " + str(prediction + 0.1)
            return prediction
        case _:
            with open(jsonPath + column + ".json", 'r') as f:
                mapping = json.load(f)
            return mapping[str(prediction)]
            


In [206]:
def predictValue(modelName, predictionData):

    assert isinstance(predictionData, dict), "predictionData must be a dictionary"
    assert modelName in allowedColumns, "Invalid model name. Model name must be a valid column name. Column names can be found in predictions.ipynb"
    assert modelName not in computeHeavyModels, "Don't have the model due to it being to compute heavy avoid models specified in predictions.ipynb in computeHeavyModels variable"
    tmpAllowedColumns = list(allowedColumns)
    tmpAllowedColumns.remove(modelName)
    for column in tmpAllowedColumns:
        assert column in predictionData , f"{column} is missing as a key in args. If a value is not provided replace it with -1"
    
    modelsPath = "models_Random_Forest/"

    predictionDF = generatePredictionDataFrame(predictionData, tmpAllowedColumns)
    if modelName in computeHeavyModels:
        return
    
    with open(modelsPath + modelName + "_model" + ".pkl", "rb") as f:
        model = pickle.load(f)

    prediction = model.predict(predictionDF)[0]
    prediction = convertPrediction(prediction, modelName)

    return prediction

      

In [None]:
def examplePredict():
    cleanedData = model.preprocessData()
    modelName = "TipNesrece"

    for models in allowedColumns:
        modelName = models
        predictionData = cleanedData.sample(n=1)

        predictionCorrect = dict()

        for column in allowedColumns:
            predictionCorrect[column] = int(predictionData[column].iloc[0])


        del predictionCorrect[modelName]

        if (models not in computeHeavyModels):
            prediction = predictValue(modelName, predictionCorrect)
            print(models + ": " + prediction)

In [None]:
#examplePredict()
#saveMostFrequentValues()


KlasifikacijaNesrece: Z MATERIALNO ŠKODO
UpravnaEnotaStoritve: LJUBLJANA
UraPN: 13.00 - 14.00
VNaselju: DA
Lokacija: NASELJE
VrstaCesteNaselja: NASELJE BREZ ULIČNEGA SISTEMA
OpisKraja: CESTA
VzrokNesrece: NEUPOŠTEVANJE PRAVIL O PREDNOSTI
TipNesrece: TRČENJE V STOJEČE / PARKIRANO VOZILO
VremenskeOkoliscine: JASNO
StanjePrometa: TEKOČ (NORMALEN)
StanjeVozisca: SUHO
VrstaVozisca: HRAPAV  ASFALT / BETON
GeoKoordinataX: 100000 - 110000
GeoKoordinataY: 460000 - 470000
Povzrocitelj: UDELEŽENEC
Starost: 20 - 30
Spol: MOŠKI
UEStalnegaPrebivalisca: LJUBLJANA
Drzavljanstvo: SLOVENIJA
PoskodbaUdelezenca: BREZ POŠKODBE
VrstaUdelezenca: VOZNIK OSEBNEGA AVTOMOBILA
UporabaVarnostnegaPasu: DA
VozniskiStazVLetih: 0 - 10
VrednostAlkotesta: 0.0 - 0.1
VrednostStrokovnegaPregleda: 0.0 - 0.1
