In [1]:
import json
import sklearn
import requests
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, f1_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, normalize

## Weather

In [2]:
# weatherapi, API for current and forecast weather data
def getWeatherInfoAPI(lat, long, days):
    key = "d1213f5d16694c20ac6144749212012" 
    url = "http://api.weatherapi.com/v1/forecast.json?"
    location = str(lat) + ',' + str(long)
    
    querystring ={"q":location,
                  "key":key,
                  "lang":"en",
                  "days":days}

    response = requests.request("GET", url, params=querystring)
    return response.json()

In [3]:
def getWeatherInfo(lat, long, days):
    response = getWeatherInfoAPI(lat, long, days)
    weather = {}
    fields_current = ["temp_c", "wind_kph", "precip_mm", "humidity"]
    fields_forecast = ["maxtemp_c", "maxwind_kph", "totalprecip_mm", 'avghumidity']
    
    # Current
    for field in fields_current:
        weather[field] = response["current"][field]

    # Change name from current fields to forecast fields
    for a, b in zip(fields_current, fields_forecast):
        weather[b] = weather[a]
        del weather[a]

    # Forecast - take the mean of the temp this week
    for data in response["forecast"]["forecastday"]:
        for field in fields_forecast:
            weather[field] = (weather[field] + data['day'][field])/2
    
    # Change name to be the same as in the DataFrame
    for a, b in zip(fields_forecast, ["MaxTemp", "WindSpeed", "Precipitation", "Humidity"]):
        weather[b] = weather[a]
        del weather[a]
    
    return weather

## Machine Learning

In [4]:
def LinearRegLatLong(df, lat, long):
    weather = getWeatherInfo(lat, long, "7")
    X = df[["Lat", "Long", "MaxTemp", "Humidity", "WindSpeed", "Precipitation"]]
    y = df["Acres"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = sklearn.linear_model.LinearRegression().fit(X_train.values, y_train.values)
    return model.predict([[lat, long, weather['MaxTemp'], weather['Humidity'], weather['WindSpeed'], weather['Precipitation']]])[0]

In [5]:
def LogisticRegLatLong(df, lat, long, duration):
    weather = getWeatherInfo(lat, long, "0")
    df.FireCause = df.FireCause.replace({"Human":0, "Natural":1, "Unknown":2})
    X = df[["Lat", "Long", "Duration", "MaxTemp", "Humidity", "WindSpeed", "Precipitation"]]
    y = df["FireCause"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = sklearn.linear_model.LogisticRegression(max_iter=5000).fit(X_train.values, y_train.values)
    return model.predict_proba([[lat, long, duration, weather['MaxTemp'], weather['Humidity'], weather['WindSpeed'], weather['Precipitation']]])

Evaluation of models

In [53]:
def evaluateLinear(df):
    X = df[["Lat", "Long", "MaxTemp", "Humidity", "WindSpeed", "Precipitation"]]
    y = df["Acres"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.11, random_state=50)
    model = sklearn.linear_model.LinearRegression().fit(X_train.values, y_train.values)
    y_pred = model.predict(X_test.values)

    return r2_score(y_test, y_pred)

In [52]:
def evaluateLogistic(df):
    df.FireCause = df.FireCause.replace({"Human":0, "Natural":1, "Unknown":2})
    X = df[["Lat", "Long", "Duration", "MaxTemp", "Humidity", "WindSpeed", "Precipitation"]]
    y = df["FireCause"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.28, random_state=69)
    model = sklearn.linear_model.LogisticRegression(max_iter=5000).fit(X_train.values, y_train.values)
    y_pred = model.predict(X_test.values)

    return f1_score(y_test, y_pred, average='micro')

## Program

In [6]:
def runProgram():
    df = pd.read_csv("Cleaned_Wildland_Fires.csv")
    
    # Input
    print("[1] Linear Regression: Predict size of possible fire this week with coordinates")
    print("[2] Logistic Regression: Predict cause of past fire with coordinates and its duration")
    modelChoice = input("Choice: ")
    
    if modelChoice != "1" and modelChoice != "2":
        raise ValueError('Model Choice should be [1] or [2]')
    
    lat = float(input("Lat: "))
    long = float(input("Long: "))
    
    if not -90 <= lat <= 90 or not -180 <= long <= 180:
        raise ValueError('Wrong coordinates')
    
    # Run Program
    if modelChoice == "1":
        print("Acres:", round(LinearRegLatLong(df, lat, long)))
        
    elif modelChoice == "2":
        duration = float(input("Duration of the fire in days: "))
        if duration < 0:
            raise ValueError('invalid days')
        pred = LogisticRegLatLong(df, lat, long, duration)[0]
        print("Cause Probability:")
        for i, cause in enumerate(["Human", "Natural", "Unknown"]):
            print("\t{0}: {1:.0%}".format(cause, pred[i]))

In [48]:
runProgram()