### Some regression predictions on ladder score and life expectancy
perchance

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

In [None]:
metrics = [
    'Life Ladder', 'Log GDP Per Capita', 'Social Support',
    'Healthy Life Expectancy At Birth', 'Freedom To Make Life Choices',
    'Generosity', 'Perceptions Of Corruption', 'Positive Affect',
    'Negative Affect', 'Confidence In National Government'
]

In [None]:
metrics_ladder = [
    'Log GDP Per Capita', 'Social Support',
    'Healthy Life Expectancy At Birth', 'Freedom To Make Life Choices',
    'Generosity', 'Perceptions Of Corruption', 'Positive Affect',
    'Negative Affect', 'Confidence In National Government'
]

In [None]:
metrics_life = metrics = [
    'Life Ladder', 'Log GDP Per Capita', 'Social Support',
    'Freedom To Make Life Choices',
    'Generosity', 'Perceptions Of Corruption', 'Positive Affect',
    'Negative Affect', 'Confidence In National Government'
]

In [None]:
df = pd.read_csv('datafiles/regional20052022.csv', index_col=0)

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
df_dropped = df.dropna()

### Basic LinearRegression train on metrics

In [None]:
X = df_dropped[[*metrics_ladder]]
y = df_dropped['Life Ladder']

In [None]:
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

In [None]:
fig = px.scatter(x=y, y=y_pred, labels={'x': 'ground truth', 'y': 'prediction'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()

### With train and test split and extra visualizations

In [None]:
# Split data into training and test splits
train_idx, test_idx = train_test_split(df_dropped.index, test_size=.25, random_state=0)
df_dropped['split'] = 'train'
df_dropped.loc[test_idx, 'split'] = 'test'

In [None]:
X = df_dropped[[*metrics_ladder]]
y = df_dropped['Life Ladder']
X_train = df_dropped.loc[train_idx, [*metrics_ladder]]
y_train = df_dropped.loc[train_idx, 'Life Ladder']

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
df_dropped['prediction'] = model.predict(X)

In [None]:
fig = px.scatter(
    df_dropped, x='Life Ladder', y='prediction',
    marginal_x='histogram', marginal_y='histogram',
    color='split', trendline='ols'
)
fig.update_traces(histnorm='probability', selector={'type':'histogram'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)

fig.show()

### visualize coefficients

In [None]:
colors = ['Positive' if c > 0 else 'Negative' for c in model.coef_]

fig = px.bar(
    x=X.columns, y=model.coef_, color=colors,
    color_discrete_sequence=['red', 'blue'],
    labels=dict(x='Feature', y='Linear coefficient'),
    title='Weight of each feature for predicting petal width'
)
fig.show()

### Predicting life expectancy

In [None]:
metrics_life

In [None]:
# Split data into training and test splits
X_life = df_dropped[[*metrics_life]]
y_life = df_dropped['Healthy Life Expectancy At Birth']
X_train_life = df_dropped.loc[train_idx, [*metrics_life]]
y_train_life = df_dropped.loc[train_idx, 'Healthy Life Expectancy At Birth']

# model_life = LinearRegression()
model_life = SVR(C=1.)
model_life.fit(X_train_life, y_train_life)
df_dropped['prediction_life'] = model_life.predict(X_life)

fig = px.scatter(
    df_dropped, x='Healthy Life Expectancy At Birth', y='prediction_life',
    marginal_x='histogram', marginal_y='histogram',
    color='split', trendline='ols'
)
fig.update_traces(histnorm='probability', selector={'type':'histogram'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)

fig.show()