# **Import libraries**

In [2]:
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, f1_score


# **Load CSV file**

In [3]:
#load csv 
df = pd.read_csv('cleaningDF.csv')
df

Unnamed: 0,storm_name,year,time,wind_power,air_pressure,storm_type,lat,long,Ocean,Month,Day,beaufort_scale,ocean_code
0,ANA,2021,5:00:00 AM,46.0,1006.0,Subtropical Storm,34.20,-62.20,Atlantic Ocean,5,22,8,0
1,ANA,2021,8:00:00 AM,46.0,1006.0,Subtropical Storm,34.20,-62.50,Atlantic Ocean,5,22,8,0
2,ANA,2021,11:00:00 AM,46.0,1006.0,Subtropical Storm,34.30,-63.00,Atlantic Ocean,5,22,8,0
3,ANA,2021,5:00:00 PM,40.0,1007.0,Subtropical Storm,34.50,-62.40,Atlantic Ocean,5,22,8,0
4,ANA,2021,11:00:00 PM,46.0,1006.0,Subtropical Storm,35.00,-61.40,Atlantic Ocean,5,22,8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
216591,NORA,1951,12:00:00 PM,58.0,985.0,Unknown,20.00,105.50,Western Pacific,9,3,10,5
216592,NORA,1951,3:00:00 PM,54.0,987.0,Unknown,19.92,105.40,Western Pacific,9,3,9,5
216593,NORA,1951,6:00:00 PM,52.0,990.0,Unknown,20.00,105.00,Western Pacific,9,3,9,5
216594,NORA,1951,9:00:00 PM,48.0,991.0,Unknown,20.40,103.95,Western Pacific,9,3,9,5


# **Methods of Linear and Logistic Regression**

- Validation methods.

In [4]:
def coordinateValidation(lat, long):
    return (-90 <= lat <= 90) and (-180 <= long <= 180)

def airPressureValidation(airPressure):
    return airPressure > 0

def dateValidation(year, month, day):
    if year < 0:
        return False
    day_count_for_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    if year % 4 == 0 and (year % 100 != 0 or year % 400 == 0):
        day_count_for_month[1] = 29
    return (1 <= month <= 12 and 1 <= day <= day_count_for_month[month])


- get latitude, longitude, air pressure and date from user to predict.

In [5]:
def getParams():
    lat = float(input("Please enter Latitude value: "))
    long = float(input("Please enter Longitude value: "))
    while not coordinateValidation(lat, long):
        print('Your coordinate are out of range please enter again. ')
        lat = float(input("Please enter Latitude value: "))
        long = float(input("Please enter Longitude value: "))
    pressure = float(input("Please enter air pressure value: "))
    while not airPressureValidation(pressure):
        print('Your air pressure are out of range please enter again. ')
        pressure = float(input("Please enter air pressure value: "))
    year, month, day = map(int, input('Enter a date in date format: year month day: ').split(' '))
    while not dateValidation(year, month, day):
        print('Your date are out of range please enter again.')
        year, month, day = map(int, input('Enter a date in date format: year month day. ').split(' '))
    return lat, long, pressure, year, month, day


# **Linear Regression Model**

In [6]:
def linearRegressionModel(df):
    X = df.loc[:, ~df.columns.isin(
        ['storm_name', 'time', 'wind_power', 'storm_type', 'Ocean', 'ocean_code', 'beaufort_scale'])]
    y = df['wind_power']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=45)
    model = LinearRegression().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    lat, long, pressure, year, month, day = getParams()
    print(f"You've entered Coordinate ({lat}, {long}), Air Pressure {pressure} and Date {year}/{month}/{day}: ")
    print(f'\nPrediction of wind power is: {float(model.predict([[year, pressure, lat, long, month, day]])[0]):.3f}'
          f' mph')
    print(f'The R^2 score of our model is: {r2_score(y_test, y_pred)}\n')




## **Logistic Regression Model**

- get wind power value from linear regression model.

In [7]:
def getWindFromLinearModel(df, lat, long, pressure, year, month, day):
    X = df.loc[:, ~df.columns.isin(
        ['storm_name', 'time', 'wind_power', 'storm_type', 'Ocean', 'ocean_code', 'beaufort'])]
    y = df['wind_power']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=45)
    model = LinearRegression().fit(X_train, y_train)
    lat = lat
    long = long
    pressure = pressure
    year, month, day = year, month, day
    return round(float(model.predict([[year, pressure, lat, long, month, day]])[0]), 3)

 rearrangedBeaufortColumn - arranged the dataframe to four sub-categories: 
 - scale between 0-4 is Breeze Wind get value 0
 - scale between 5-8 is Tropical Depression get value 1
 - scale between 9-11 is Tropical Storm get value 2
 - scale greater than 12 is Deadly Storm get value 3

In [8]:
def rearrangeBeaufortColumn(df):
    df = df.copy()
    dfBeaufortScaleLessThan5 = df[df['beaufort_scale'] < 5].reset_index(drop=True)
    dfBeaufortScaleBetween5To8 = df[(df['beaufort_scale'] >= 5) & (df['beaufort_scale'] <= 8)].reset_index(drop=True)
    dfBeaufortScaleBetween9To11 = df[(df['beaufort_scale'] >= 9) & (df['beaufort_scale'] <= 11)].reset_index(drop=True)
    dfBeaufortScaleHigherThan12 = df[(df['beaufort_scale'] >= 12)].reset_index(drop=True)
    dfBeaufortScaleLessThan5['beaufort'] = 0
    dfBeaufortScaleBetween5To8['beaufort'] = 1
    dfBeaufortScaleBetween9To11['beaufort'] = 2
    dfBeaufortScaleHigherThan12['beaufort'] = 3

    return pd.concat(
        [dfBeaufortScaleHigherThan12, dfBeaufortScaleBetween9To11, dfBeaufortScaleBetween5To8,
         dfBeaufortScaleLessThan5], ignore_index=True).drop(['beaufort_scale'], axis=1)


In [9]:
def logisticRegressionModel(df):
    # Logistic Regression by beaufort split (windy ,low, med, high)
    df = rearrangeBeaufortColumn(df)
    
    X = df.loc[:, ~df.columns.isin(
        ['storm_name', 'time', 'storm_type', 'Ocean', 'ocean_code', 'beaufort'])]
    y = df['beaufort']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)
    # scaled our X
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_train_scaled = scaler.fit_transform(X_train)
    # build the model
    model = LogisticRegression().fit(X_train_scaled, y_train)
    y_pred = model.predict(scaler.transform(X_test))
    lat, long, pressure, year, month, day = getParams()
    wind = getWindFromLinearModel(df, lat, long, pressure, year, month, day)
    cords = scaler.transform([[year, wind, pressure, lat, long, month, day]])
    predicted_values = model.predict_proba(cords)[0]
    windyProb, lowProb, mediumProb, highProb = predicted_values[0] * 100, predicted_values[1] * 100, predicted_values[
        2] * 100, predicted_values[3] * 100
    print(f"You've entered Coordinate ({lat}, {long}), Air Pressure {pressure}, Wind Power {wind} (from linear model)"
          f" and Date {year}/{month}/{day}: ")
    print(f'A windy storm probability: {windyProb:.2f}%'
          f'\nTropical Depression probability: {lowProb:.2f}%\n'
          f'Tropical Storm probability: {mediumProb:.2f}%\nA deadly storm probability: {highProb:.2f}%\n')

    # Evaluate our model
    print(f'The f1 score (average=micro) of our model is: {f1_score(y_test, y_pred, average="micro")}')
    print(f'The Accuracy rate of our model is: {metrics.accuracy_score(y_test, y_pred)}')
    print(f'The Precision rate (average=micro) of our model is: '
          f'{metrics.precision_score(y_test, y_pred, average="micro")}')
    print(f'The Recall rate (average=micro)of our model is: {metrics.recall_score(y_test, y_pred, average="micro")}')
    print('Confusion Matrix of beaufort rate 0-3:')
    

# **Run the program**

In [10]:
def printMenu():
    print('Please enter [0] to Exit\n'
          'please enter [1] to Linear model\n'
          'Please enter [2] to Logistic model')


def chooseModel():
    printMenu()
    userInput = int(input())
    while userInput != 0:
        if userInput == 0:
            break
        elif userInput == 1:
            linearRegressionModel(df)
        elif userInput == 2:
            logisticRegressionModel(df)
        printMenu()
        userInput = int(input('Please enter number in the list.'))



In [12]:
1chooseModel()

Please enter [0] to Exit
please enter [1] to Linear model
Please enter [2] to Logistic model
1
Please enter Latitude value: 63
Please enter Longitude value: 103
Please enter air pressure value: 1003
Enter a date in date format: year month day: 2022 02 23


  "X does not have valid feature names, but"


You've entered Coordinate (63.0, 103.0), Air Pressure 1003.0 and Date 2022/2/23: 

Prediction of wind power is: 39.184 mph
The R^2 score of our model is: 0.8738339725955943

Please enter [0] to Exit
please enter [1] to Linear model
Please enter [2] to Logistic model
Please enter number in the list.2


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Please enter Latitude value: 63
Please enter Longitude value: 103
Please enter air pressure value: 1003
Enter a date in date format: year month day: 2022 02 23


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


You've entered Coordinate (63.0, 103.0), Air Pressure 1003.0, Wind Power 39.268 (from linear model) and Date 2022/2/23: 
A windy storm probability: 1.63%
Tropical Depression probability: 97.11%
Tropical Storm probability: 1.26%
A deadly storm probability: 0.00%

The f1 score (average=micro) of our model is: 0.980870742855384
The Accuracy rate of our model is: 0.980870742855384
The Precision rate (average=micro) of our model is: 0.980870742855384
The Recall rate (average=micro)of our model is: 0.980870742855384
Confusion Matrix of beaufort rate 0-3:
Please enter [0] to Exit
please enter [1] to Linear model
Please enter [2] to Logistic model
Please enter number in the list.0
