In [1]:
import os
import json
import kaggle
import random
import requests
import subprocess
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import VotingClassifier
from lightgbm import LGBMClassifier

pd.set_option("display.max_columns", 50)
pd.set_option("display.max_rows", 20)
pd.set_option('display.width', 1000)
sns.set_style('white')

def get_files(path="./data", extension=".csv"):
    return [file for file in os.listdir(path) if file[-4:] == extension]

def clean(folder_path="analysis/", extension=".png"):
    for file in os.listdir(folder_path):
        print(f"Removing {file}.")
        os.remove(os.path.join(folder_path, file))

print(get_files())
clean()

['my_submission.csv', 'sample_submission.csv', 'test.csv', 'train.csv']


In [2]:
def extract_data(file_path="data/train.csv", output_column=["Transported"]):
    df = pd.read_csv(
        filepath_or_buffer=file_path,
        sep=",",
        usecols=[
            "PassengerId",
            "HomePlanet",
            "CryoSleep",
            "Cabin",
            "Destination",
            "Age",
            "VIP",
            "RoomService",
            "FoodCourt",
            "ShoppingMall",
            "Spa",
            "VRDeck",
        ] + output_column,
    )
    print(f"Loading DataFrame with size: {df.shape}.")
    if df["PassengerId"].isnull().sum() > 0:
        raise ValueError("PassengerId null values can not be Greater than 0.")
    for column in df.columns:
        print(f"The type of the columns {column} is {df[column].dtype}.")
    return df.set_index("PassengerId")


df = extract_data()
df_test = extract_data(file_path="data/test.csv", output_column=[])
df = pd.concat([df, df_test])
print("\n", df.sample(20))

Loading DataFrame with size: (8693, 13).
The type of the columns PassengerId is object.
The type of the columns HomePlanet is object.
The type of the columns CryoSleep is object.
The type of the columns Cabin is object.
The type of the columns Destination is object.
The type of the columns Age is float64.
The type of the columns VIP is object.
The type of the columns RoomService is float64.
The type of the columns FoodCourt is float64.
The type of the columns ShoppingMall is float64.
The type of the columns Spa is float64.
The type of the columns VRDeck is float64.
The type of the columns Transported is bool.
Loading DataFrame with size: (4277, 12).
The type of the columns PassengerId is object.
The type of the columns HomePlanet is object.
The type of the columns CryoSleep is object.
The type of the columns Cabin is object.
The type of the columns Destination is object.
The type of the columns Age is float64.
The type of the columns VIP is object.
The type of the columns RoomService i

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [3]:
def nan_analysis(df):
    print("\n-------- NAN Analysis --------")
    for col in df.columns:
        nulls = df[col].isnull().sum()
        print(f"For columns: {col}, there are {nulls/df.shape[0]:.2f}% null or {nulls} values.")
    print("--------------------------------\n")

def HomePlanet_analysis(df, analysis_folder="analysis", column="HomePlanet"):
    if not os.path.exists(analysis_folder):
        os.mkdir(analysis_folder)
    fig, axs = plt.subplots(2, 1, sharex=True, figsize=(34, 18))
    for index, hue in enumerate(["VIP", "Destination"]):
        sns.catplot(
            data=df,
            x=column,
            y="Transported",
            kind="bar",
            hue=hue,
            ax=axs[index],
        )
        axs[index].set_title(f"{column}_hue_{hue}", fontsize=28)
        axs[index].set_xlabel(column, fontsize=28)
        axs[index].set_ylabel("Transported", fontsize=28)
    fig.savefig(os.path.join(analysis_folder, f"{column}.png"))                
    for ax in axs:
        ax.remove()
    plt.close()


def Cabin_analysis(df, analysis_folder="analysis", column="Cabin"):
    if not os.path.exists(analysis_folder):
        os.mkdir(analysis_folder)
    print("Creating new column Cabin_deck.")
    df["Cabin_deck"] = df[column].str[0]
    print("Creating new column Cabin_num.")
    df["Cabin_num"] = df[column].str[2:-2]
    print("Creating new column Cabin_side.")
    df["Cabin_side"] = df[column].str[-1]
    for index, hue in enumerate(["VIP", "Destination", "CryoSleep"]):
        print(f"Creating Cabin_deck/{hue} catplot.")
        fig = plt.figure(figsize=(300, 100))
        fig = sns.catplot(
            data=df,
            x="Cabin_deck",
            y="Transported",
            kind="bar",
            hue=hue,
        )
        fig.savefig(os.path.join(analysis_folder, f"{hue}_Cabin_deck.png"))
    for index, hue in enumerate(["VIP", "Destination", "CryoSleep"]):
        print(f"Creating Cabin_side/{hue} catplot.")
        fig = plt.figure(figsize=(300, 100))
        fig = sns.catplot(
            data=df,
            x="Cabin_side",
            y="Transported",
            kind="bar",
            hue=hue,
        )
        fig.savefig(os.path.join(analysis_folder, f"{hue}_Cabin_side.png"))
    plt.close()


def Age_analysis(df, analysis_folder="analysis", column="Age"):
    if not os.path.exists(analysis_folder):
        os.mkdir(analysis_folder)
    print("Creating new columns for Age_group.")
    df["Age_group"] = "mean"
    df.loc[df["Age"].between(0, 10), "Age_group"] = "0-10"
    df.loc[df["Age"].between(10, 20), "Age_group"] = "10-20"
    df.loc[df["Age"].between(20, 30), "Age_group"] = "20-30"
    df.loc[df["Age"].between(30, 40), "Age_group"] = "30-40"
    df.loc[df["Age"].between(40, 50), "Age_group"] = "40-50"
    df.loc[df["Age"].between(50, 60), "Age_group"] = "50-60"
    df.loc[df["Age"].between(60, 70), "Age_group"] = "60-70"
    df.loc[df["Age"].ge(70), "Age_group"] = ">70"
    for index, hue in enumerate(["VIP", "Destination", "CryoSleep", "HomePlanet", "Cabin_deck", "Cabin_side"]):
        print(f"Creating Age_group/{hue} catplot.")
        fig = plt.figure(figsize=(300, 100))
        fig = sns.catplot(
            data=df,
            x="Age_group",
            y="Transported",
            kind="bar",
            hue=hue,
        )
        fig.savefig(os.path.join(analysis_folder, f"{hue}_Age_group.png"))


def CryoSleep_analysis(df, analysis_folder="analysis", column="CryoSleep"):
    if not os.path.exists(analysis_folder):
        os.mkdir(analysis_folder)
    for index, hue in enumerate(["VIP", "Destination", "HomePlanet", "Cabin_deck", "Cabin_side"]):
        print(f"Creating CryoSleep/{hue} catplot.")
        fig = plt.figure(figsize=(300, 100))
        fig = sns.catplot(
            data=df,
            x="CryoSleep",
            y="Transported",
            kind="bar",
            hue=hue,
        )
        fig.savefig(os.path.join(analysis_folder, f"{hue}_Age_group.png"))

def fill_nan_values1(df):
    new_df = df.copy()
    new_df.loc[(new_df["HomePlanet"] == "Earth") & (new_df["VIP"].isnull()), "VIP"] = "False"
    new_df.loc[(new_df["Cabin_deck"].isin(["G", "T"])) & (new_df["VIP"].isnull()), "VIP"] = "False"
    new_df.loc[new_df["Age"].isnull(), "Age"] = df["Age"].mean()
    new_df.loc[new_df["Age_group"] == "0-10", "VIP"] = "False"
    return new_df

def fill_nan_values2(df):
    new_df = df.copy()
    new_df["HomePlanet"].fillna(new_df["HomePlanet"].mode()[0], inplace=True)
    new_df["CryoSleep"].fillna(new_df["CryoSleep"].mode()[0], inplace=True)
    new_df["Destination"].fillna(new_df["Destination"].mode()[0], inplace=True)
    new_df["VIP"].fillna(new_df["VIP"].mode()[0], inplace=True)
    new_df["Age"].fillna(new_df["Age"].mean(), inplace=True)
    new_df["RoomService"].fillna(new_df["RoomService"].mean(), inplace=True)
    new_df["FoodCourt"].fillna(new_df["FoodCourt"].mean(), inplace=True)
    new_df["ShoppingMall"].fillna(new_df["ShoppingMall"].mean(), inplace=True)
    new_df["Spa"].fillna(new_df["Spa"].mean(), inplace=True)
    new_df["VRDeck"].fillna(new_df["VRDeck"].mean(), inplace=True)
    return new_df

def adding_features(df):
    new_df = df.copy()
    new_df["total_amount_spent"] = (
        new_df["RoomService"] +
        new_df["FoodCourt"] +
        new_df["ShoppingMall"] +
        new_df["Spa"] +
        new_df["VRDeck"]
    )
    new_df.loc[new_df["Age"].between(0, 10), "Age_group"] = "0-10"
    new_df.loc[new_df["Age"].between(10, 20), "Age_group"] = "10-20"
    new_df.loc[new_df["Age"].between(20, 30), "Age_group"] = "20-30"
    new_df.loc[new_df["Age"].between(30, 40), "Age_group"] = "30-40"
    new_df.loc[new_df["Age"].between(40, 50), "Age_group"] = "40-50"
    new_df.loc[new_df["Age"].between(50, 60), "Age_group"] = "50-60"
    new_df.loc[new_df["Age"].between(60, 70), "Age_group"] = "60-70"
    new_df.loc[new_df["Age"].ge(70), "Age_group"] = ">70"
    new_df["Cabin_deck"] = new_df["Cabin"].str[0]
    new_df["Cabin_side"] = new_df["Cabin"].str[-1]
    new_df["Cabin_deck"].fillna(new_df["Cabin_deck"].mode()[0], inplace=True)
    new_df["Cabin_side"].fillna(new_df["Cabin_side"].mode()[0], inplace=True)
    new_df.drop(columns=["Cabin"], inplace=True)
    return new_df

nan_analysis(df)
new_df = fill_nan_values2(df)
ninja = adding_features(new_df)
nan_analysis(ninja)


-------- NAN Analysis --------
For columns: Age, there are 0.02% null or 270 values.
For columns: Cabin, there are 0.02% null or 299 values.
For columns: CryoSleep, there are 0.02% null or 310 values.
For columns: Destination, there are 0.02% null or 274 values.
For columns: FoodCourt, there are 0.02% null or 289 values.
For columns: HomePlanet, there are 0.02% null or 288 values.
For columns: RoomService, there are 0.02% null or 263 values.
For columns: ShoppingMall, there are 0.02% null or 306 values.
For columns: Spa, there are 0.02% null or 284 values.
For columns: Transported, there are 0.33% null or 4277 values.
For columns: VIP, there are 0.02% null or 296 values.
For columns: VRDeck, there are 0.02% null or 268 values.
--------------------------------


-------- NAN Analysis --------
For columns: Age, there are 0.00% null or 0 values.
For columns: CryoSleep, there are 0.00% null or 0 values.
For columns: Destination, there are 0.00% null or 0 values.
For columns: FoodCourt, th

In [4]:
def transform_data(df):
    new_df = df.copy()
    to_encode = ["HomePlanet", "CryoSleep", "Destination", "VIP", "Age_group", "Cabin_deck", "Cabin_side"]
    to_scale = ["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck", "total_amount_spent"]
    encoders, scalers = {}, {}
    for column in to_encode:
        enc = LabelEncoder()
        print(f"Enconding column: {column} with LabelEncoder.")
        if new_df[column].isnull().sum() > 0:
            print(f"Skipping for column: {column}. There are still nan values.")
            continue
        new_df[column] = enc.fit_transform(new_df[column])
        encoders[column] = enc
    for column in to_scale:
        sc = MinMaxScaler()
        print(f"Scaling column: {column} with MinMaxScaler.")
        new_df[column] = sc.fit_transform(new_df[column].values.reshape(-1, 1))
        scalers[column] = sc
    return new_df

transformed_df = transform_data(ninja)
print("\n", transformed_df.sample(5))

Enconding column: HomePlanet with LabelEncoder.
Enconding column: CryoSleep with LabelEncoder.
Enconding column: Destination with LabelEncoder.
Enconding column: VIP with LabelEncoder.
Enconding column: Age_group with LabelEncoder.
Enconding column: Cabin_deck with LabelEncoder.
Enconding column: Cabin_side with LabelEncoder.
Scaling column: Age with MinMaxScaler.
Scaling column: RoomService with MinMaxScaler.
Scaling column: FoodCourt with MinMaxScaler.
Scaling column: ShoppingMall with MinMaxScaler.
Scaling column: Spa with MinMaxScaler.
Scaling column: VRDeck with MinMaxScaler.
Scaling column: total_amount_spent with MinMaxScaler.

                   Age  CryoSleep  Destination  FoodCourt  HomePlanet  RoomService  ShoppingMall       Spa Transported  VIP  VRDeck  total_amount_spent  Age_group  Cabin_deck  Cabin_side
PassengerId                                                                                                                                                               

In [5]:
X = transformed_df[~transformed_df["Transported"].isnull()].drop(columns=["Transported"])
Y = transformed_df[~transformed_df["Transported"].isnull()]["Transported"]
enc = LabelEncoder()
Y = enc.fit_transform(Y)
to_predict = transformed_df[transformed_df["Transported"].isnull()]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

In [6]:
lda = LinearDiscriminantAnalysis(n_components=12)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)



In [7]:
# RandomForest
hyper_params_rf = {
    "n_estimators": [50, 100, 200, 300, 400, 500, 600, 700, 800],
    "max_depth": [1, 2, 3, 5, 10, 15, 20],
}
hyper_params_xgb = {
    "learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
    "gamma": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    "max_depth ": [0, 1, 2, 3, 4, 5, 6, 10],
}
hyper_params_cb = {
    "n_estimators": [50, 100, 150, 200, 300, 400, 500, 600],
    "max_depth": [2, 4, 8, 12, 15, 20],
    "learning_rate": [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5],
}
hyper_param_lgmb = {
    "num_leaves": [10, 15, 25, 35, 45, 55, 85, 100],
    "max_depth": [-1, 2, 4, 8, 10, 12, 15, 20, 25],
    "learning_rate": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
}

model_lgbm = LGBMClassifier()
model_rf = RandomForestClassifier(verbose=0)
model_xgb = xgb.XGBClassifier(verbose=False)
model_cb = CatBoostClassifier(verbose=False)

rand_search_rf = RandomizedSearchCV(
    model_rf,
    param_distributions=hyper_params_rf,
    n_iter=3,
    cv=5,
)
rand_search_xgb = RandomizedSearchCV(
    model_xgb,
    param_distributions=hyper_params_xgb,
    n_iter=3,
    cv=5,
)
rand_search_cat = RandomizedSearchCV(
    model_cb,
    param_distributions=hyper_params_cb,
    n_iter=3,
    cv=5,
)
rand_search_lgmb = RandomizedSearchCV(
    model_lgbm,
    param_distributions=hyper_param_lgmb,
    n_iter=3,
    cv=5,
)

# Fit the random search object to the data
rand_search_xgb.fit(X_train, y_train)
# Create a variable for the best model
best_xgb = rand_search_xgb.best_estimator_
# Fit the random search object to the data
rand_search_rf.fit(X_train, y_train)
# Create a variable for the best model
best_rf = rand_search_rf.best_estimator_
# Fit the random search object to the data
rand_search_cat.fit(X_train, y_train)
# Create a variable for the best model
best_cb = rand_search_cat.best_estimator_
# Fit the random search object to the data
rand_search_lgmb.fit(X_train, y_train)
# Create a variable for the best model
best_cb = rand_search_lgmb.best_estimator_
# Print the best hyperparameters
print(f"Best hyperparameters for RF: {rand_search_rf.best_params_}.")
# Print the best hyperparameters
print(f"Best hyperparameters for XGB: {rand_search_xgb.best_params_}.")
# Print the best hyperparameters
print(f"Best hyperparameters for CAT: {rand_search_cat.best_params_}.")
# Print the best hyperparameters
print(f"Best hyperparameters for LGMB: {rand_search_lgmb.best_params_}.")

KeyboardInterrupt: 

In [None]:
model = LGBMClassifier(num_leaves=100, max_depth=20, learning_rate=0.5)
model.fit(X_train, y_train)
# Predict values 
y_predict = model.predict(X_test)
# Score
accuracy = accuracy_score(y_test, y_predict)
print(f"Accuracy: {accuracy}.")
precision = precision_score(y_test, y_predict)
print(f"Precision: {precision}.")
recall = recall_score(y_test, y_predict)
print(f"Recall: {recall}.")

In [None]:
model = RandomForestClassifier(
    n_estimators=600,
    max_depth=15,
    criterion="gini",
    verbose=1,
)
# Train the model with train data
model.fit(X_train, y_train)
# Predict values 
y_predict = model.predict(X_test)
# Score
accuracy = accuracy_score(y_test, y_predict)
print(f"Accuracy: {accuracy}.")
precision = precision_score(y_test, y_predict)
print(f"Precision: {precision}.")
recall = recall_score(y_test, y_predict)
print(f"Recall: {recall}.")

In [None]:
model_xgb = xgb.XGBClassifier(
    max_depth=4,
    learning_rate=0.4,
    gamma=0.6,
    verbose=True,
)


# Fit the random search object to the data
model_xgb.fit(X_train, y_train)
# Predict values 
y_predict = model.predict(X_test)
# Score
accuracy = accuracy_score(y_test, y_predict)
print(f"Accuracy: {accuracy}.")
precision = precision_score(y_test, y_predict)
print(f"Precision: {precision}.")
recall = recall_score(y_test, y_predict)
print(f"Recall: {recall}.")

In [None]:
model_cat = CatBoostClassifier(
    n_estimators=400,
    max_depth=8,
    learning_rate=0.2,
    verbose=True
)


# Fit the random search object to the data
model_cat.fit(X_train, y_train)
# Predict values 
y_predict = model_cat.predict(X_test)
# Score
accuracy = accuracy_score(y_test, y_predict)
print(f"Accuracy: {accuracy}.")
precision = precision_score(y_test, y_predict)
print(f"Precision: {precision}.")
recall = recall_score(y_test, y_predict)
print(f"Recall: {recall}.")

In [8]:
# KFold & Ensemble learning
kfold = KFold(n_splits=20)
cat_model = CatBoostClassifier(n_estimators=400, max_depth=8, learning_rate=0.4, verbose=False)
xgb_model = xgb.XGBClassifier(max_depth=5, learning_rate=0.02, gamma=0.1, verbose=False)
rf_model = RandomForestClassifier(n_estimators=700, max_depth=2, verbose=0)
model_lgbm = LGBMClassifier(num_leaves=15, max_depth=12, learning_rate=0.05)

estimators = [("catboost", cat_model), ("XGBoost", xgb_model), ("RandomForest", rf_model), ("LGBM", model_lgbm)]
best_score, best_model = 0, None
for train_index, test_index in kfold.split(X):
    ensemble = VotingClassifier(estimators)
    x_train, x_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    ensemble.fit(x_train,y_train)
    new_score = ensemble.score(x_test, y_test)
    print(f"New_score: {new_score}")
    if new_score > best_score:
        best_score = new_score
        best_model = ensemble

New_score: 0.8022988505747126
New_score: 0.7839080459770115
New_score: 0.7425287356321839
New_score: 0.7954022988505747
New_score: 0.7563218390804598
New_score: 0.8
New_score: 0.7770114942528735
New_score: 0.8091954022988506
New_score: 0.7793103448275862
New_score: 0.8068965517241379
New_score: 0.7977011494252874
New_score: 0.8022988505747126
New_score: 0.8137931034482758
New_score: 0.8179723502304147
New_score: 0.783410138248848
New_score: 0.8456221198156681
New_score: 0.815668202764977
New_score: 0.8294930875576036
New_score: 0.8064516129032258
New_score: 0.7672811059907834


In [9]:
to_predict["Transported"] = best_model.predict(to_predict.drop(columns=["Transported"]))
to_predict["Transported"] = enc.inverse_transform(to_predict["Transported"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [14]:
def generate_submission_file_based_on_prediction(result, source_file_path="data/sample_submission.csv"):
    df = pd.read_csv(
        filepath_or_buffer=source_file_path,
        sep=",",
    ).set_index("PassengerId")
    print(f"Entry: {df.shape}.")
    merge_df = pd.merge(
        left=df,
        right=result,
        how="left",
        left_index=True,
        right_index=True,
    )
    print(f"Output: {merge_df.shape}.")
    if df.shape[0] != merge_df.shape[0]:
        raise ValueError(f"Should be same size.")
    merge_df = merge_df[["Transported_x", "Transported_y"]]
    merge_df = merge_df.reset_index().rename(columns={"Transported_y": "Transported"}).drop(columns=["Transported_x"])
    merge_df.to_csv("data/my_submission.csv", sep=",", index=False)
    
def generate_random_submission(source_file_path="data/sample_submission.csv"):
    df = pd.read_csv(
        filepath_or_buffer=source_file_path,
        sep=",",
    )
    df["Transported"] = df["Transported"].apply(lambda x: random.choice(["False", "True"]))
    print(f"File data/my_submission.csv successfully generated.\n")
    df.to_csv("data/my_submission.csv", sep=",", index=False)
    
def submit_submission(competition_id="2357", submission_file="data/my_submission.csv"):
    with open("kaggle.json") as credential:
        json_credential = json.loads(credential.read())
        os.environ["KAGGLE_USERNAME"] = json_credential["username"]
        os.environ["KAGGLE_KEY"] = json_credential["key"]
    result = subprocess.check_output(
        [
            "kaggle",
            "competitions",
            "submit",
            "spaceship-titanic",
            "-f",
            submission_file,
            "-m",
            f"{dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: New submission",
        ]
    ).decode("utf-8")
    print(result)

def get_latest_score(competition_id="2357", team_id="10059555"):
    with open("kaggle.json") as credential:
        json_credential = json.loads(credential.read())
        os.environ["KAGGLE_USERNAME"] = json_credential["username"]
        os.environ["KAGGLE_KEY"] = json_credential["key"]
        os.environ["KAGGLE_TEAM_ID"] = team_id
    result = subprocess.check_output(["kaggle", "competitions", "submissions", "spaceship-titanic"]).decode("utf-8")
    print(result)

generate_submission_file_based_on_prediction(to_predict)
submit_submission()
# get_latest_score()

Entry: (4277, 1).
Output: (4277, 16).
Successfully submitted to Spaceship Titanic


In [15]:
get_latest_score()

fileName               date                 description                          status    publicScore  privateScore  
---------------------  -------------------  -----------------------------------  --------  -----------  ------------  
my_submission.csv      2023-08-23 18:44:21  2023-08-23 19:44:18: New submission  complete  0.79798                    
my_submission.csv      2023-03-24 11:21:24  2023-03-24 11:21:18: New submission  complete  0.79822                    
my_submission.csv      2023-03-24 10:40:46  2023-03-24 10:40:42: New submission  complete  0.79120                    
my_submission.csv      2023-03-24 10:36:54  2023-03-24 10:36:49: New submission  complete  0.79869                    
my_submission.csv      2023-03-24 10:32:32  2023-03-24 10:32:18: New submission  complete  0.79448                    
my_submission.csv      2023-03-17 15:44:11  2023-03-17 15:44:09: New submission  complete  0.79354                    
my_submission.csv      2023-03-17 15:38: