In [None]:
def MultipleRegressionPredictor(df, opt=False, it=0):
    
    '''
    This takes a Pandas DataFrame as input (requires that the column corresponding to the dependent variable be the
    last column of the DataFrame) and returns another Pandas DataFrame as output, performing multiple linear regression
    and making predictions based on the model. It needs the Pandas, NumPy and Scipy Libraries installed.
    
    The function starts by calculating the number of observations and variables in the input DataFrame,
    splitting the predictor and dependent variables, and computing the covariance matrix of the independent variables.
    It then obtains the coefficients and intercept of the linear regression model by solving the normal equation,
    and prints the intercept and coefficients.

    Next, the function calculates the fitted values and R² of the model, and performs an F-test to check the overall
    significance of the model. If the F-test is significant, the function then performs a series of T-tests to check the
    significance of each individual predictor variable.

    If the "opt" parameter of the function is set to True and any of the predictor variables are not significant,
    the function drops those variables and performs the regression again recursively until all predictor variables
    are significant or until the maximum number of iterations is reached.

    Finally, the function returns a DataFrame with the original input variables and a new column for predicted values based
    on the linear regression model.
    
    Args:
        df (pandas.DataFrame): input dataframe
        opt (bool): whether or not to optimize the model
        it (int): maximum number of iterations to perform if optimizing the model
        
    '''
    
    # Check input types
    if not isinstance(df, pd.DataFrame):
        raise TypeError("df must be a pandas DataFrame.")
    if not isinstance(opt, bool):
        raise TypeError("opt must be a boolean.")
    if not isinstance(it, int):
        raise TypeError("it must be an integer.")
    
    # DataFrame's Shape
    N, p = df.shape
    
    # Splitting predictor and dependent variables
    x_v = np.array(df.iloc[:, 0:p-1])
    y_v = np.array(df.iloc[:, p-1])
    
    # Covariance Matrix
    df_cov = pd.DataFrame(np.cov(df.values.T, ddof=1))
    df_cov = df_cov.drop(df_cov.index.max())
            
    # A and b
    A = df_cov.iloc[:, 0:p-1].values.astype(float)
    b = df_cov.iloc[:,p-1].values.astype(float)

    # Coefficients
    coef = np.linalg.solve(A,b)

    # Intercept
    intercept = df.mean()[len(df.mean())-1]
    for i in range(0, p-1):
        intercept -= df.mean()[i] * coef[i]
    
    # Printing Intercept and Coeficients
    print(f'Intercept: {round(intercept,4)}\n')
    for c in range(0,len(coef)):
        print(f'Coeficient 𝛽{c} ({df.columns[c]}): {round(coef[c], 4)}\n')
    
    # Fitted Values
    fitted_values = intercept
    for c in range(0,p-1):
        fitted_values += df[df.columns[c]] * coef[c]
    
    # Getting R²
    residuals = y_v - np.array(fitted_values)
    soma_quad_fitted_med = ((fitted_values-y_v.mean())**2).sum()
    soma_quad_fitted_real = (residuals**2).sum()
    R = soma_quad_fitted_med / (soma_quad_fitted_med + soma_quad_fitted_real)
    print(f'R²: {round(R,4)}\n_________________________________________________________________\n')
    
    # F-Test
    dof2 = N - p - 1
    f_stat = ((soma_quad_fitted_med/p)/(soma_quad_fitted_real/(N-p-1)))
    
    # P-value for F-Test
    pvalf = 1-stats.f.cdf(f_stat, p, dof2)
    print(f'F-statistic: {round(f_stat, 4)}, p-value: {round(pvalf, 4)}, dof = {dof2}.')
    
    # Checking overall significance
    if pvalf < 0.05:
        print('At least one 𝛽 is significant.\n_________________________________________________________________\n')
    else:
        print('There are no significant 𝛽.\n_________________________________________________________________\n')
        return
    
    # T-Test
    X_ = np.concatenate((np.array([1 for x in range(0, len(df))]).reshape(-1,1), x_v), axis=1)
    sigma_squared_hat = soma_quad_fitted_real / (N - p)
    var_beta_hat = np.linalg.inv(X_.T @ X_) * sigma_squared_hat

    # Intercept T statistic
    estat_t_alpha = intercept/(var_beta_hat[0, 0] ** 0.5 )  
    pvalt = stats.t.cdf(-abs(estat_t_alpha), dof2)  * 2
    print(f'T statistic (α): {round(estat_t_alpha,4)}, p-value: {round(pvalt, 4)}.')
    
    # Intercept P-value for T-Test
    if pvalt < 0.05:
        print(f'The intercept is statisticly different than 0.\n')
    else:
        print(f'The intercept is statisticly equal 0.\n') 
    
    # Coefficients T statistics and P-value
    list_pval = []
    for i in range(0,p-1):
        estat_t_beta = coef[i]/(var_beta_hat[i+1, i+1] ** 0.5 )  
        pvalt = stats.t.cdf(-abs(estat_t_beta), dof2) * 2
        
        print(f'T statistic (𝛽{i}): {round(estat_t_beta,4)}, p-value: {round(pvalt, 4)}.')
        if pvalt < 0.05:
            print(f'Coeficient 𝛽{i} ({df.columns[i]}) is statisticly different than 0.\n')
        else:
            print(f'Coeficient 𝛽{i} ({df.columns[i]}) is statisticly equal 0.\n')
        list_pval.append(pvalt)

        
    # If there's any p-value smaller than 0.05 and the parameter opt == True the function will drop non significant variables
    if opt == True and max(list_pval) > 0.05:
        it +=1
        print(f'Excluded column(s): {df.columns[[i for i, x in enumerate(np.array(list_pval) > 0.05) if x]]}')
        df = df.drop(df.columns[[i for i, x in enumerate(np.array(list_pval) > 0.05) if x]],1)
        print(f'\n_________________________________________________________________\nIteration number {it}\n\n')
        df = MultipleRegressionPredictor(df, it=it, opt=True)
        return df
        
    # Returning the original Dataframe with the predicted values
    else:  
        df_final = df.copy()
        df_final['predicted_values'] = intercept 
        for i in range(0, len(coef)):
            df_final['predicted_values'] += df_final[df_final.columns[i]] * coef[i]

        return df_final