# Importing Libraries

In [199]:
import plotly.express as px
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Creating Datafame

In [200]:
time=[1964,1968,1983,1987,1988,1991,1994,1996,1999,2002,2005,2006,2007,2008,2009]
record=[10.06,9.95,9.93,9.83,9.79,9.86,9.85,9.84,9.79,9.78,9.77,9.77,9.74,9.69,9.58]

df={
    "time":time,
    "record":record,
    "gender":(['Male']*len(time)),
    "predicted":(['No']*len(time))
}

male_record=pd.DataFrame.from_dict(df)
male_record

Unnamed: 0,time,record,gender,predicted
0,1964,10.06,Male,No
1,1968,9.95,Male,No
2,1983,9.93,Male,No
3,1987,9.83,Male,No
4,1988,9.79,Male,No
5,1991,9.86,Male,No
6,1994,9.85,Male,No
7,1996,9.84,Male,No
8,1999,9.79,Male,No
9,2002,9.78,Male,No


In [201]:
time=[1968,1972,1976,1977,1982,1983,1984,1988]
record=[11.07,11.07,11.01,10.88,10.88,10.79,10.76,10.49]

df={
    "time":time,
    "record":record,
    "gender":(['Female']*len(time)),
    "predicted":(['No']*len(time))
}

female_record=pd.DataFrame.from_dict(df)
female_record

Unnamed: 0,time,record,gender,predicted
0,1968,11.07,Female,No
1,1972,11.07,Female,No
2,1976,11.01,Female,No
3,1977,10.88,Female,No
4,1982,10.88,Female,No
5,1983,10.79,Female,No
6,1984,10.76,Female,No
7,1988,10.49,Female,No


In [202]:
data=pd.concat([male_record,female_record])

In [203]:
data

Unnamed: 0,time,record,gender,predicted
0,1964,10.06,Male,No
1,1968,9.95,Male,No
2,1983,9.93,Male,No
3,1987,9.83,Male,No
4,1988,9.79,Male,No
5,1991,9.86,Male,No
6,1994,9.85,Male,No
7,1996,9.84,Male,No
8,1999,9.79,Male,No
9,2002,9.78,Male,No


# Visualizing Dataset

In [210]:
fig = px.scatter(data, x="time", y="record", color="gender")
fig.update_traces(marker_size=10)
fig.update_xaxes(range=[min(data['time'])-2,2030],title='time (year)')
fig.update_yaxes(range=[5,12],title='record (seconds)')
fig.update_layout(title='100m dash records over time')
fig.show()


# Linear Regression Function

In [211]:
from sklearn.linear_model import LinearRegression


def predict_record(gender,year,data):
    
    try:
        records=data.query("gender=='{}'".format(gender))
        reg = LinearRegression().fit(np.array(records['time']).reshape(-1, 1), np.array(records['record']).reshape(-1, 1))
        print("Coeffecients: {}".format(reg.coef_[0][0]))
        
        new_row={
            "time":[year],
            "record":[reg.predict(np.array([[year]]))[0][0]],
            "gender":[gender],
            "predicted":["Yes"]}
        
      
        
        
         
        print("Predicted Value: {}".format(reg.predict(np.array([[year]]))[0][0]))
        return pd.DataFrame.from_dict(new_row)
        

    except:
        print("Invalid Input")
        
        



# Predicting Future Values 

In [212]:
data=data.append(predict_record('Male',2022,data))


Coeffecients: -0.007250219106047333
Predicted Value: 9.610877154542797


In [213]:
data=data.append(predict_record('Male',2030,data))


Coeffecients: -0.007250219106047342
Predicted Value: 9.55287540169442


In [214]:
data=data.append(predict_record('Female',2022,data))

Coeffecients: -0.02648325358851676
Predicted Value: 9.723349282296645


In [215]:
data=data.append(predict_record('Female',2030,data))

Coeffecients: -0.02648325358851685
Predicted Value: 9.511483253588523


In [216]:
data

Unnamed: 0,time,record,gender,predicted
0,1964,10.06,Male,No
1,1968,9.95,Male,No
2,1983,9.93,Male,No
3,1987,9.83,Male,No
4,1988,9.79,Male,No
5,1991,9.86,Male,No
6,1994,9.85,Male,No
7,1996,9.84,Male,No
8,1999,9.79,Male,No
9,2002,9.78,Male,No


# Visualizing Predicted Values

In [194]:
fig = px.scatter(data, x="time", y="record", color="gender",symbol='predicted')
fig.update_traces(marker_size=10)
fig.update_xaxes(range=[min(data['time'])-2,max((2030+2),max(data['time'])+2)])
fig.update_yaxes(range=[5,12])
fig.show()

In [195]:
fig = px.scatter(data, x="time", y="record", color="gender",trendline="ols")
fig.update_traces(marker_size=10)
fig.update_xaxes(range=[min(data['time'])-2,max((2030+2),max(data['time'])+2)])
fig.update_yaxes(range=[5,12])
fig.show()

# Limitations of Model

In [196]:
data=data.append(predict_record('Male',2070,data))
data=data.append(predict_record('Female',2070,data))

Coeffecients: -0.007250219106047336
Predicted Value: 9.262866637452525
Coeffecients: -0.02648325358851673
Predicted Value: 8.452153110047846


In [197]:
fig = px.scatter(data, x="time", y="record", color="gender",symbol='predicted')
fig.update_traces(marker_size=10)
fig.update_xaxes(range=[min(data['time'])-2,max((2030+2),max(data['time'])+2)])
fig.update_yaxes(range=[5,12])
fig.show()

In [198]:
fig = px.scatter(data, x="time", y="record", color="gender",trendline="ols")
fig.update_traces(marker_size=10)
fig.update_xaxes(range=[min(data['time'])-2,max((2030+2),max(data['time'])+2)])
fig.update_yaxes(range=[5,12])
fig.show()
