This is for when there are many groups and we want to study the trend in each of them separately.
For every target variable there will be created a ``.csv`` file containing information about linear regression in each group.

- ``variable`` - independent variable over which the linear regression is performed i.e. years
- ``target`` - list of variables to explain
- ``groupby`` - column of data containg group lables, i.e. countries
  

In [5]:
data_path = "grid_charts_data.csv"

variable = "Year"
target = ["Median_SCR_Median", "Average_SCR_Avg"]
groupby = "ASJC_2d"

create_CSV = False

In [6]:
import pandas as pd

df = pd.read_csv(data_path)

from scipy.stats import linregress
from sklearn.metrics import mean_squared_error

def lin_reg(X, y):
    reg = linregress(X,y)

    mse = mean_squared_error(y, reg.slope * y + reg.intercept)

    default = list(reg)
    custom = [reg.intercept_stderr, mse,reg.rvalue**2]

    return default + custom

In [7]:
default_statnames = ["slope", "intercept", "r_value", "p_value", "std_err"]
custom_statnames = ["intercept_stderr", "MSE", "R^2"]
statnames = default_statnames + custom_statnames

result_dict = {}
for targeted in target:
    result_dict[targeted] = {}
    for group in df[groupby].unique():
        data = df.loc[df[groupby] == group, [variable, targeted]]
        X = data[variable]
        y = data[targeted]
        reg_list = lin_reg(X,y)
        result = dict(zip(statnames, reg_list))
        result_dict[targeted][group] = result

result = {}
for targeted in target:
    data = pd.DataFrame(result_dict[targeted]).transpose().sort_index()
    data.index.name = groupby
    if create_CSV:
        data.to_csv(f"reg_{groupby}_{targeted}.csv", index=True)
    result[targeted] = data

In [8]:
result[target[0]]

Unnamed: 0_level_0,slope,intercept,r_value,p_value,std_err,intercept_stderr,MSE,R^2
ASJC_2d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
11,0.000242,-0.40981,0.636407,9.025712e-05,5.4e-05,0.107421,0.235636,0.405014
12,-0.000228,0.491411,-0.711711,4.942479e-06,4.1e-05,0.082388,0.209022,0.506532
13,-0.000502,1.117633,-0.707989,5.825735e-06,9.1e-05,0.183198,1.011792,0.501249
14,-0.000516,1.074868,-0.917268,1.58512e-13,4.1e-05,0.082105,1.072724,0.841381
15,-0.001067,2.268628,-0.454083,0.00903883,0.000382,0.766602,4.579271,0.206192
16,-0.000412,0.952566,-0.524561,0.002055764,0.000122,0.244929,0.683162,0.275164
17,-0.000348,0.785967,-0.576207,0.0005579181,9e-05,0.180825,0.487538,0.332014
18,-0.001269,2.63606,-0.725706,2.601676e-06,0.00022,0.440542,6.477458,0.52665
19,0.000209,-0.338121,0.504293,0.00325004,6.5e-05,0.130957,0.175459,0.254311
20,-0.000561,1.180927,-0.942667,7.687005e-16,3.6e-05,0.072772,1.26749,0.888621
