# Import packages

In [None]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
import sys
import os

def find_project_root(marker="multiff_analysis"):
    """Search upward until we find a folder containing `marker`."""
    cur = Path(os.getcwd()).resolve()   # use CWD instead of __file__
    for parent in [cur] + list(cur.parents):
        if (parent / marker).exists():
            return parent
    raise FileNotFoundError(f"Could not find project root with marker '{marker}'")

project_root = find_project_root()

# Build the paths relative to project root
pgam_src = project_root / "multiff_analysis" / "external" / "pgam" / "src"
pgam_src_pg = pgam_src / "PGAM"

for path in [pgam_src, pgam_src_pg]:
    if str(path) not in sys.path:
        sys.path.append(str(path))


from pathlib import Path
import os, sys
for p in [Path.cwd()] + list(Path.cwd().parents):
    if p.name == 'Multifirefly-Project':
        os.chdir(p)
        sys.path.insert(0, str(p / 'multiff_analysis/multiff_code/methods'))
        break


from data_wrangling import specific_utils, process_monkey_information, general_utils
from pattern_discovery import pattern_by_trials, pattern_by_trials, cluster_analysis, organize_patterns_and_features
from visualization.matplotlib_tools import plot_behaviors_utils
from neural_data_analysis.neural_analysis_tools.get_neural_data import neural_data_processing
from neural_data_analysis.neural_analysis_tools.visualize_neural_data import plot_neural_data, plot_modeling_result
from neural_data_analysis.neural_analysis_tools.model_neural_data import transform_vars, neural_data_modeling, drop_high_corr_vars, drop_high_vif_vars
from neural_data_analysis.topic_based_neural_analysis.neural_vs_behavioral import prep_monkey_data, prep_target_data, neural_vs_behavioral_class
from neural_data_analysis.topic_based_neural_analysis.planning_and_neural import planning_and_neural_class, pn_utils, pn_helper_class
from neural_data_analysis.neural_analysis_tools.cca_methods import cca_class
from neural_data_analysis.neural_analysis_tools.cca_methods import cca_class, cca_utils, cca_cv_utils
from neural_data_analysis.neural_analysis_tools.cca_methods.cca_plotting import cca_plotting, cca_plot_lag_vs_no_lag, cca_plot_cv
from machine_learning.ml_methods import regression_utils, ml_methods_utils, regz_regression_utils, ml_methods_class, classification_utils, ml_plotting_utils
from neural_data_analysis.neural_analysis_tools.pgam_tools import pgam_class

import sys
import math
import gc
import subprocess
from pathlib import Path
from importlib import reload

# Third-party imports
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
from scipy import linalg, interpolate
from scipy.signal import fftconvolve
from scipy.io import loadmat
from scipy import sparse
from numpy import pi

# Machine Learning imports
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.multivariate.cancorr import CanCorr

# Neuroscience specific imports
import neo
import rcca

plt.rcParams["animation.html"] = "html5"
os.environ['KMP_DUPLICATE_LIB_OK']='True'
rc('animation', html='jshtml')
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
matplotlib.rcParams['animation.embed_limit'] = 2**128
pd.set_option('display.float_format', lambda x: '%.5f' % x)
np.set_printoptions(suppress=True)
print("done")

%load_ext autoreload
%autoreload 2

# Retrieve data

## get data

In [None]:
# raw_data_folder_path = "all_monkey_data/raw_monkey_data/monkey_Bruno/data_0330"

In [None]:
raw_data_folder_path = "all_monkey_data/raw_monkey_data/monkey_Schro/data_0416"

In [None]:
reload(planning_and_neural_class)

In [None]:
reduce_y_var_lags = True
planning_data_by_point_exists_ok = True
y_data_exists_ok = True

pn = planning_and_neural_class.PlanningAndNeural(raw_data_folder_path=raw_data_folder_path)
pn.prep_data_to_analyze_planning(planning_data_by_point_exists_ok=planning_data_by_point_exists_ok)
pn.planning_data_by_point, cols_to_drop = general_utils.drop_columns_with_many_nans(
    pn.planning_data_by_point)
pn.get_x_and_y_data_for_modeling(exists_ok=y_data_exists_ok, reduce_y_var_lags=reduce_y_var_lags)

In [None]:
# pn.test_inst.planning_data_by_point.columns.to_list()

# PGAM

Code is from https://github.com/BalzaniEdoardo/PGAM

I might need to run this on google colab ...? Or, look into clusters? or maybe mac pro is enough?

In [None]:
import numpy as np
import sys
from PGAM.GAM_library import *
import PGAM.gam_data_handlers as gdh
import matplotlib.pylab as plt
import pandas as pd
from post_processing import postprocess_results
from scipy.io import savemat

## individual steps

## check for NA

In [None]:
general_utils.check_na_in_df(pn.x_var, df_name="DataFrame", return_rows_and_columns=True)

In [None]:
general_utils.check_na_in_df(pn.y_var_reduced, df_name="DataFrame", return_rows_and_columns=True)

### categorize variables

In [None]:
pgam_inst = pgam_class.PGAMclass(pn.x_var[[col for col in pn.x_var if col.startswith('cluster_')]], 
                                 pn.y_var, pn.bin_width, pn.processed_neural_data_folder_path)

In [None]:
pgam_inst.prepare_for_pgam(num_total_trials=10)

### temporal kernel

modified from PGAM_Tutorial.ipynb

In [None]:
pgam_inst._add_temporal_features_to_model()

### spatial variable

In [None]:
pgam_inst._add_spatial_features_to_model()

# Iterate

In [None]:
pgam_inst.x_var.describe()

In [None]:
for neural_cluster_number in range(7, pn.x_var.shape[1]):
    try: 
        pgam_inst.load_pgam_results(neural_cluster_number)
        print('loaded results for neural_cluster_number: ', neural_cluster_number)
        pgam_inst.plot_results(plot_vars_in_reduced_list_only=True)
        continue
    except Exception as e:
        print(f"Error occurred while loading results: {e}")
    print('neural_cluster_number: ', neural_cluster_number)
    pgam_inst.run_pgam(neural_cluster_number=neural_cluster_number)
    pgam_inst.post_processing_results()
    pgam_inst.plot_results(plot_vars_in_reduced_list_only=True)
    pgam_inst.save_results()

In [None]:
# for neural_cluster_number in range(pn.x_var.shape[1]):
#     print('neural_cluster_number: ', neural_cluster_number)
#     pgam_inst.run_pgam(neural_cluster_number=neural_cluster_number)
#     pgam_inst.post_processing_results()
#     pgam_inst.plot_results(plot_vars_in_reduced_list_only=True)
#     pgam_inst.save_results()

### run

In [None]:
pgam_inst.run_pgam(neural_cluster_number=5)

### post-processing

In [None]:
pgam_inst.res.shape

In [None]:
pgam_inst.post_processing_results()

In [None]:
pgam_inst.plot_results(plot_vars_in_reduced_list_only=True)

### save results

In [None]:
pgam_inst.save_results()

In [None]:
stop!

## iterate through all neurons

In [None]:
pgam_inst = pgam_class.PGAMclass(pn.x_var, pn.y_var, pn.bin_width, pn.processed_neural_data_folder_path)

In [None]:
for i in range(pn.x_var.shape[1]):
    print(f'neural_cluster_number: {i} out of {pn.x_var.shape[1]}')
    pgam_inst.streamline_pgam(neural_cluster_number=i, num_total_trials=10)