In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt                         # library used to display graphs
import seaborn as sns   

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
dfe = pd.read_csv("/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv")
dfe.columns = ["series_id", "night", "event", "eventstep", "eventtimestamp"]
dfe

In [None]:
dfe = dfe.dropna()
dfe

In [None]:
dfe.set_index("eventtimestamp", inplace=True)
dfe

In [None]:
actors = dfe["series_id"].unique()
len(actors)

In [None]:
df_tampon = dfe[dfe["series_id"] == actors[0]].copy()
df_tampon

In [None]:
#dfe["event"].replace({"onset":0,"wakeup":1},inplace=True)
df_tampon["event"].replace({"onset":0,"wakeup":1},inplace=True)

In [None]:
#df_tampon.drop("events",axis=1, inplace=True)
df_tampon

In [None]:
import pyarrow as pa
import pyarrow.parquet as pq
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [None]:
train_series_file = "/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet"

# This schema can be optimized.
parquet_schema = pa.schema(
    [
        ("series_id", pa.string()),
        ("step", pa.int32()),
        ("timestamp", pa.string()),
        ("anglez", pa.float32()),
        ("enmo", pa.float32()),
    ]
)

In [None]:
def write_data(input_path, output_path, parquet_schema):
    parquet_file = pq.ParquetFile(input_path)
    data = parquet_file.read().to_pandas()
    grouped_data = data.groupby("series_id")

    writer = pq.ParquetWriter(output_path, parquet_schema)

    for series_id, group in tqdm(grouped_data):
        table = pa.Table.from_pandas(group, schema=parquet_schema)
        writer.write_table(table)
    writer.close()


In [None]:
write_data(train_series_file, "train_series_regrouped.parquet", parquet_schema)

In [None]:
# Define the path to the regrouped Parquet file
regrouped_path = "train_series_regrouped.parquet"
regrouped_file = pq.ParquetFile(regrouped_path)
num_row_groups = regrouped_file.num_row_groups
print(f"Number of groups: {num_row_groups}")

In [None]:
def get_series(series_id):
    table = regrouped_file.read_row_group(series_id)
    return table.to_pandas()
def prepdf(i):
    df = get_series(i)
    pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    ti = '900S'
    df = df.resample(ti).agg({
        'series_id':'first',
        'step':'first',
        'anglez':'mean',
        'enmo':'mean'
    }).reset_index()
    scaler = StandardScaler()
    df[['anglez', 'enmo']] = scaler.fit_transform(df[['anglez', 'enmo']])
    return df

df = prepdf(1)
    
    
    

In [None]:
dfe = pd.read_csv("/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv")
dfe = dfe.dropna()
dfe.drop(["night","timestamp"], axis=1, inplace=True)

In [None]:
def getev(df,dfe, actor):
    df_tampon =  dfe[dfe["series_id"] == actor].copy()
    df_tampon["event"].replace({"onset":0,"wakeup":1},inplace=True)
    df_tampon['step'] = df_tampon['step'].apply(lambda x: round(x / 180) * 180)
    df = df.merge(df_tampon,on=["step"])
    return df

In [None]:
actors = dfe["series_id"].unique()
i = 0
dfs = pd.DataFrame()
for actor in actors :
    df = prepdf(i)
    df = getev(df, dfe, actor)
    if dfs.empty:
        dfs = df.copy()
    else:
        dfs = pd.concat([dfs,df], ignore_index=True)
    i += 1
    if i == 7:
        i+=1
    if i == 17:
        break
dfs

In [None]:
dfs.drop("series_id_y", axis=1, inplace=True)
dfs

In [None]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=dfs, x='step', y='anglez', hue='event', style='series_id_x', markers=True, dashes=False)
sns.lineplot(data=dfs, x='step', y='enmo', hue='event', style='series_id_x', markers=True, dashes=False)
plt.xlabel('Step')
plt.ylabel('Valeurs')
plt.title('Variation de Anglez et Enmo en fonction du nombre de step (Couleur selon Event et Series ID)')
plt.legend(title='Event')
plt.tight_layout()
plt.show()

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.svm import LinearSVC

In [None]:
dfc = dfs.copy()
Y = dfc["event"]
X = dfc.drop(["timestamp", "series_id_x","step","event"],axis=1)
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,Y,random_state=42,test_size=0.2)

In [None]:
svc = LinearSVC(dual=True)
Xtsvc = Xtrain.copy()
Ytsvc = Ytrain.copy()
svc.fit(Xtsvc,Ytsvc)
ypredsvc = svc.predict(Xtest)
print(ypredsvc)
cnfMatSvc = metrics.confusion_matrix(Ytest, ypredsvc, normalize="true")
cnfMatSvc

In [None]:
testeur = pd.read_parquet("/kaggle/input/child-mind-institute-detect-sleep-states/test_series.parquet")
xtesteur = testeur.drop(["timestamp", "series_id","step"],axis=1)


In [None]:
scaler = StandardScaler()
xtesteur[['anglez', 'enmo']] = scaler.fit_transform(xtesteur[['anglez', 'enmo']])
xtesteur

In [None]:
ytesteur = svc.predict(xtesteur)
ytesteur

In [None]:
testeur

In [None]:
testeur = pd.concat([testeur,pd.DataFrame(ytesteur)], axis = 1)
sub = testeur.drop(["timestamp","anglez", "enmo"], axis = 1).copy()
sub

In [None]:
sub = sub.rename(columns={0 : "event"})
print(sub)
sub["event"] = sub["event"].replace({0:"onset",1:"wakeup"})
subdf = sub.copy()
subdf["score"] = 1.0
subdf

In [None]:
subfilter = subdf[subdf["event"] != subdf["event"].shift()]
subfilter.reset_index(drop=True, inplace=True)
subfilter

In [None]:
subfilter.to_csv("submission.csv")