# This notebook goes through the procedure of MR and behavioral data processing.
## First - import packages and load data

In [3]:
#!pip install numpy pandas matplotlib seaborn sklearn
from pathlib import Path
import numpy as np
import pandas as pd

data_directory = Path('../data')
mr_data = pd.read_pickle(data_directory / "mr_data.pickle")
parcels = pd.read_csv(data_directory / "BNA_with_cerebellum.csv", index_col=0)


In [4]:
# cols = mr_data.columns.to_list()
# cols = [c if c not in parcels.Label.astype(str).values else int(c) for c in cols]
# mr_data.columns = cols
# mr_data = pd.read_csv(data_directory / 'mr_data.csv',index_col=0)
# mr_data["session"] = pd.to_datetime(mr_data["session"])
# mr_data[["height","weight","age"]] = mr_data[["height","weight","age"]].astype(float)
# mr_data.to_pickle(data_directory / 'mr_data.pickle')

## Meteorological data
We'll use the [Meteostat](https://dev.meteostat.net/) package to extract [hourly data](https://dev.meteostat.net/python/hourly.html#data-structure) regarding weather and climate.

In [5]:
from meteostat import Point, Hourly

# Start and end dates of available scans
start_date = mr_data["session"].min()
end_date = mr_data["session"].max()

# Location of the weather station
tel_aviv = {'lon': 34.8, 'lat': 32.0833}
point = Point(**tel_aviv)

# Create a hourly object and fetch the data
hourly_met_data = Hourly(point, start_date, end_date).fetch()

# Filter only columns that have (at least some) values
valid_met_columns = hourly_met_data.columns[~hourly_met_data.isnull().all()]
hourly_met_data = hourly_met_data[valid_met_columns]

# Fill missing values with the median of the column
hourly_met_data.fillna(hourly_met_data.median(), inplace=True)

hourly_met_data.reset_index(inplace=True)

## Combine MRI, behavioral and meteorological data.

In [6]:
data = mr_data.copy()
for i, row in data.iterrows():
    # Find the closest date in the hourly data
    closest_datetime = abs(row["session"] - hourly_met_data["time"]).idxmin()
    # Add the hourly meteorological data to the dataframe
    data.loc[i, valid_met_columns] = hourly_met_data.loc[closest_datetime, valid_met_columns].astype(float)

data["day"] = data["session"].dt.day
data["hour"] = data["session"].dt.hour
data["dayofweek"] = data["session"].dt.dayofweek

In [7]:
from sklearn import preprocessing

columns_to_scale = ["age", "height", "weight", "hour", "dayofweek", "day"] + valid_met_columns.to_list()
for col in columns_to_scale:
    print(f"Scaling {col}")
    data[f"{col}_scaled"] = preprocessing.scale(data[col].values)

Scaling age
Scaling height
Scaling weight
Scaling hour
Scaling dayofweek
Scaling day
Scaling temp
Scaling dwpt
Scaling rhum
Scaling prcp
Scaling wdir
Scaling wspd
Scaling wpgt
Scaling pres
Scaling coco


In [8]:
data.to_pickle(data_directory / 'data_combined.pickle')