In [8]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_columns', None)
from datetime import datetime
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold
from sklearn.model_selection import KFold
from catboost import CatBoostClassifier,CatBoostRegressor


In [9]:
path = "C:/Users/HP/Desktop/fugo/"

os.listdir(path)

def get_datetime_from_timestamp(val):
    return datetime.fromtimestamp(val / 1000000)

In [10]:
targets_train = pd.read_csv(path + "targets_train.csv")
users_test = pd.read_csv(path + "users_test.csv")
users_train = pd.read_csv(path + "users_train.csv")
user_features_test = pd.read_csv(path + "user_features_test.csv")
user_features_train = pd.read_csv(path + "user_features_train.csv")

id_target_map = {i[0]:i[1] for i in targets_train.values}

In [11]:
df_train = pd.concat([user_features_train, users_train],axis=1)
df_test = pd.concat([user_features_test, users_test],axis=1)

In [12]:
df_train["target"] =  df_train["ID"].iloc[:,0].map(id_target_map)

stat_df = pd.DataFrame(columns=["iter","fold","train_score","test_score"])

In [13]:
df_train["date"] = df_train["first_open_timestamp"].apply(get_datetime_from_timestamp)
df_test["date"] = df_test["first_open_timestamp"].apply(get_datetime_from_timestamp)

df_train["local_date"] = df_train["local_first_open_timestamp"].apply(get_datetime_from_timestamp)
df_test["local_date"] = df_test["local_first_open_timestamp"].apply(get_datetime_from_timestamp)


In [14]:
exc_cols = ["first_open_date","ID", "date", "local_date", "fold", "target", "grp"]
cat_cols = [i for i in users_train.columns if users_train[i].dtype == "O" and i not in exc_cols]

ret_cols = [i for i in user_features_train.columns if "retention" in i.lower()]
level_advanced_cols = [i for i in user_features_train.columns if "leveladvanced" in i.lower()]
level_duration_cols = [i for i in user_features_train.columns if "level" in i.lower() and "duration" in i.lower()]
ad_revenue_cols = [i for i in user_features_train.columns if "adrevenue" in i.lower() ]
ap_revenue_cols = [i for i in user_features_train.columns if "aprevenue" in i.lower() ]

In [15]:
def get_cat_mean_feature(train_df, test_df):

    for col in cat_cols:
        mean_encoding_map = {i[0]:i[1] for i in  train_df.groupby(col)["target"].mean().reset_index().values}
        train_df[col + "_mean_encoding"] = train_df[col].map(mean_encoding_map)
        test_df[col + "_mean_encoding"] = test_df[col].map(mean_encoding_map)

    if "target" in list(train_df.columns):
        train_df = train_df.drop("target", axis=1)

        
    return train_df, test_df

In [16]:
fold_count = 5
# base_kf = KFold(n_splits=fold_count, shuffle=True, random_state=42)

def get_general_stats(df, cols, pref_name):
    sum_ = df[cols].sum(axis=1)
    std_ = df[cols].std(axis=1)
    
    cummax_df = df[cols].cummax(axis=1)
    cummax_df.columns = [i + "_cummax" for i in cummax_df]

    cummin_df = df[cols].cummin(axis=1)
    cummin_df.columns = [i + "_cummin" for i in cummin_df]

    cumsum_df = df[cols].cumsum(axis=1)
    cumsum_df.columns = [i + "_cumsum" for i in cumsum_df]
    
    fin_df = pd.concat([cummax_df,cummin_df, cumsum_df],axis=1)

    fin_df[pref_name + "_sum"] = sum_
    fin_df[pref_name + "_std"] = std_
    
    return fin_df

def get_retention_stats(df):
    mean_retentions = df[ret_cols].mean(axis=1)
    std_retentions = df[ret_cols].std(axis=1)
    
    cumsum_df = df[ret_cols].cumsum(axis=1)
    cumsum_df.columns = [i + "cumsum" for i in ret_cols]

    cumsum_df["mean_retentions"] = mean_retentions
    cumsum_df["std_retentions"] = std_retentions
    
    return cumsum_df

def get_time_related_feats(df):
    date_ = df["first_open_timestamp"].apply(get_datetime_from_timestamp)
    local_date = df["local_first_open_timestamp"].apply(get_datetime_from_timestamp)

    def get_datetime_feats(col):
        month = date_.dt.month
        year = date_.dt.year
        day = date_.dt.day
        hour = date_.dt.hour
        return month, year, day, hour
    

    day_diff = (date_ - local_date).dt.days
    second_diff = (date_ - local_date).dt.seconds
    hour_diff = second_diff //  3600
    df_ = pd.concat([day_diff, hour_diff],axis=1)
    df_.columns = ["day_diff", "hour_diff"]

    df_["month"], df_["year"], df_["day"], df_["hour"] = get_datetime_feats(date_)
    df_["local_month"], df_["local_myear"], df_["local_mday"], df_["local_mhour"] = get_datetime_feats(local_date)
    return df_

def get_score(y, y_pred):   
    mse = np.mean((y - y_pred)**2)
    return mse ** (1/2)


In [17]:
import numpy as np

def SumOfAverageFunction(vals):
    return (abs(np.divide(df2.values, df1.loc[vals.index].values)-1)*100).sum()

# vals = df1.column1.rolling(3)
# vals.apply(SumOfAverageFunction, raw=False)

In [18]:
a = df_train[ad_revenue_cols].head(100)

In [25]:
def get_div_vals(df,col_names):
    a = df[col_names]
    b = a.T.values
    liste = []
    for i in range(1,a.shape[1]):
        liste.append(b[i]/ b[i-1])

    dff = pd.DataFrame(liste).T
    dff.columns = [i + "_div_col" for i in col_names[:-1]]
    dff.index = df.index
    return dff

In [26]:
def get_rolling_stats(df, rol_cols, rol_step, rol_aggs = ["mean"]):
    df_list = []
    if "mean" in rol_aggs:
        df[rol_cols] = df[rol_cols]*1
        rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
        rol_df_mean = rol_df_mean.iloc[:,1:]
        rol_df_mean.columns = [i + f"_rolling_{rol_step}_mean" for i in rol_df_mean.columns]
        df_list.append(rol_df_mean)
    if "max" in rol_aggs:
        df[rol_cols] = df[rol_cols]*1
        rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
        rol_df_max = rol_df_max.iloc[:,1:]
        rol_df_max.columns = [i + f"_rolling_{rol_step}_max" for i in rol_df_max.columns]
        df_list.append(rol_df_max)

    fin = pd.concat(df_list,axis=1)
    return fin



def count_zeros(df, cols, cols_name):
    a = (df[cols] == 0).sum(axis=1)
    b = (df[cols] != 0).sum(axis=1)
    return a , b

In [27]:
col_couples = [("advanced", level_advanced_cols), ("duration",level_duration_cols), ("ad_revenue",ad_revenue_cols), ("ap_revenue",ap_revenue_cols)]

def make_main_df(train_or_test):
    generated_feats = []

    if train_or_test == "train":
        df = df_train.copy().iloc[:100]
    else:
        df = df_test.copy().iloc[:100]

    print("--------train_or_test ------------")
    generated_feats.append(get_retention_stats(df))
    generated_feats.append(get_time_related_feats(df))


    for col_name, col in col_couples:
        gen_stat_df = get_general_stats(df, col, col_name)
        generated_feats.append(gen_stat_df)
        for rol_step in [2,3,4,5]:
            generated_feats.append(get_rolling_stats(df, col, rol_step, rol_aggs = ["mean","max"]))
    print("---------  1   -----------------")
    for rol_step in [2,3,4,5]:
        generated_feats.append(get_rolling_stats(df, ret_cols, rol_step, rol_aggs = ["mean"]))
    print("---------  2   -----------------")

    # fin_df = pd.concat(generated_feats, axis=1)

    for col_name, col in col_couples:
        generated_feats.append(get_div_vals(df,col))
    print("---------  3   -----------------")

    first = generated_feats[0]
    for i in generated_feats[1:]:
        first = first.join(i)
    print("---------  4   -----------------")

    fin_df = pd.concat([df,first ],axis=1)
    print("---------  5   -----------------")

    for col_name, col in col_couples:
        fin_df[f"{col_name}_zero_count"], fin_df[f"{col_name}_nonzero_count"]  =  count_zeros(df, col, col_name)
        print("---------  5   -----------------")

    return fin_df
    # return fin_df, generated_feats


    

In [28]:
merged_train = make_main_df("train")
merged_test = make_main_df("test")


--------train_or_test ------------


  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol

---------  1   -----------------
---------  2   -----------------
---------  3   -----------------
---------  4   -----------------
---------  5   -----------------
---------  5   -----------------
---------  5   -----------------
---------  5   -----------------
---------  5   -----------------
--------train_or_test ------------


  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol

---------  1   -----------------
---------  2   -----------------
---------  3   -----------------
---------  4   -----------------
---------  5   -----------------
---------  5   -----------------
---------  5   -----------------
---------  5   -----------------
---------  5   -----------------


  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_max = df[rol_cols].rolling(rol_step,axis=1).max()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
  liste.append(b[i]/ b[i-1])
  liste.append(b[i]/ b[i-1])
  liste.append(b[i]/ b[i-1])
  liste.append(b[i]/ b[i-1])
  liste.append(b[i]/ b[i-1])


In [32]:
merged_train[cat_cols] = merged_train[cat_cols].fillna("nan")
merged_test[cat_cols] = merged_test[cat_cols].fillna("nan")

In [33]:
asd = """<ipython-input-35-6d45861fb45e>:5: FutureWarning: Support for axis=1 in DataFrame.rolling is deprecated and will be removed in a future version. Use obj.T.rolling(...) instead
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
<ipython-input-35-6d45861fb45e>:5: FutureWarning: Support for axis=1 in DataFrame.rolling is deprecated and will be removed in a future version. Use obj.T.rolling(...) instead
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
<ipython-input-35-6d45861fb45e>:5: FutureWarning: Support for axis=1 in DataFrame.rolling is deprecated and will be removed in a future version. Use obj.T.rolling(...) instead
  rol_df_mean = df[rol_cols].rolling(rol_step,axis=1).mean()
""".split("rol_df_mean")

In [34]:
len(asd)

4

In [35]:
fold_count = 5
def create_folds(dff, n_s=5, n_grp=None):
    df = dff.copy()
    df['fold'] = -1
    
    if n_grp is None:
        skf = KFold(n_splits=n_s, random_state=42,  shuffle=True)
        target = df.target
    else:
        skf = StratifiedKFold(n_splits=n_s, random_state=42, shuffle=True)
        df['grp'] = pd.cut(df.target, n_grp, labels=False)
        target = df.grp
    
    for fold_no, (t, v) in enumerate(skf.split(target, target)):
        df.loc[v, 'fold'] = fold_no
    return df

df_to_train = create_folds(merged_train, n_s=fold_count, n_grp=fold_count)




In [81]:
train_cols = [i for i in df_to_train.columns if i not in exc_cols]
len(train_cols)

848

In [37]:
for iter in [50]:
  for fold in range(fold_count):

    X_train = df_to_train.loc[df_to_train.fold != fold][train_cols] 
    X_test = df_to_train.loc[df_to_train.fold == fold][train_cols] 
    y_train = df_to_train.loc[df_to_train.fold != fold]["target"] 
    y_test = df_to_train.loc[df_to_train.fold == fold]["target"] 

    X_train, X_test = get_cat_mean_feature(df_to_train.loc[df_to_train.fold != fold][train_cols + ["target"]] ,
                                            df_to_train.loc[df_to_train.fold == fold][train_cols + ["target"]] )

    model=CatBoostRegressor(verbose=0, iterations=iter, cat_features=cat_cols)##cat_features=cat_cols

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_train = model.predict(X_train)
    test_score = get_score(y_test, y_pred)
    train_score = get_score(y_train, y_pred_train)
    print("fold :",fold, "test score :", test_score, "train_score :", train_score)
    row = {"iter":iter, "fold":fold, "train_score":train_score, "test_score":test_score}
    stat_df.loc[stat_df.shape[0]] = row
    break
    
    


fold : 0 test score : 0.100547060441087 train_score : 0.009111960686425401


In [82]:
train_cols

['first_prediction',
 'RetentionD0',
 'RetentionD1',
 'RetentionD2',
 'RetentionD3',
 'RetentionD4',
 'RetentionD5',
 'RetentionD6',
 'RetentionD7',
 'RetentionD8',
 'RetentionD9',
 'RetentionD10',
 'RetentionD11',
 'RetentionD12',
 'RetentionD13',
 'RetentionD14',
 'RetentionD15',
 'LevelAdvancedCountD0',
 'LevelAdvancedCountD1',
 'LevelAdvancedCountD2',
 'LevelAdvancedCountD3',
 'LevelAdvancedCountD4',
 'LevelAdvancedCountD5',
 'LevelAdvancedCountD6',
 'LevelAdvancedCountD7',
 'LevelAdvancedCountD8',
 'LevelAdvancedCountD9',
 'LevelAdvancedCountD10',
 'LevelAdvancedCountD11',
 'LevelAdvancedCountD12',
 'LevelAdvancedCountD13',
 'LevelAdvancedCountD14',
 'LevelAdvancedCountD15',
 'Level_1_Duration',
 'Level_2_Duration',
 'Level_3_Duration',
 'Level_4_Duration',
 'Level_5_Duration',
 'Level_6_Duration',
 'Level_7_Duration',
 'Level_8_Duration',
 'Level_9_Duration',
 'Level_10_Duration',
 'AdRevenueD0',
 'AdRevenueD1',
 'AdRevenueD2',
 'AdRevenueD3',
 'AdRevenueD4',
 'AdRevenueD5',
 'Ad

## submission

In [92]:
sub_df = pd.DataFrame(merged_test[["ID"]].iloc[:,0])


In [89]:
sad = {"selected_cols":train_cols, "train_df":merged_train, "test_df":merged_test, "cat_features": cat_cols, "iterations":5, "learning_rate":0.05,
       "use_mean_encoding":True,"task_type":"GPU"}


def get_submissions(selected_cols, train_df, test_df, use_mean_encoding,cat_features, iterations, learning_rate, task_type):

    X_train = train_df[selected_cols] 
    X_test = test_df[selected_cols] 
    y_train = train_df["target"].values

    submission_model=CatBoostRegressor(cat_features=cat_features, iterations=iterations, learning_rate = learning_rate, task_type=task_type)

    if use_mean_encoding:
        X_train, X_test = get_cat_mean_feature(train_df[selected_cols  + ["target"]] ,
            test_df[selected_cols])
    
    submission_model.fit(X_train, y_train)
    y_pred_sub = submission_model.predict(X_test)
    y_pred_train_sub = submission_model.predict(X_train)

    train_score = get_score(y_train, y_pred_train_sub)
    

    df = sub_df.copy()
    df["TARGET"] = y_pred_sub
    df.to_csv(f"{iterations}_{learning_rate}_{use_mean_encoding}_fugo.csv")

    return y_pred_sub, y_pred_train_sub, train_score

for iterations in [500, 50,100,1000]:
    for learning_rate in [0.1, 0.5, 0.05]:
        for use_mean_encoding in [True, False]:
            sad = {"selected_cols":train_cols, "train_df":merged_train, "test_df":merged_test, "cat_features": cat_cols, "iterations":iterations, "learning_rate":learning_rate,
                    "use_mean_encoding":use_mean_encoding,"task_type":"GPU"}
            y_pred_sub, y_pred_train_sub, train_score = get_submissions(**sad)

            break


In [168]:
merged_train[cat_cols].iloc[:2000]

Unnamed: 0,country,platform,device_category,device_brand,device_model,ad_network
0,Mexico,Android,mobile,Xiaomi,Redmi A2,unityads_int
1,Peru,Android,mobile,Samsung,Galaxy A13,applovin_int
2,Brazil,Android,mobile,Xiaomi,Redmi 12,applovin_int
3,Dominican Republic,iOS,mobile,Apple,iPhone 11 Pro Max,
4,Ecuador,Android,mobile,Motorola,Moto E22,applovin_int
...,...,...,...,...,...,...
95,Sweden,iOS,mobile,Apple,iPhone 13 Pro Max,
96,United States,iOS,mobile,Apple,iPhone 11,applovin_int
97,Romania,Android,mobile,Samsung,Galaxy S22,
98,United States,Android,mobile,Samsung,Galaxy S22 Ultra,applovin_int


In [170]:
merged_train[cat_cols].iloc[:2000].device_model.unique()

array(['Redmi A2', 'Galaxy A13', 'Redmi 12', 'iPhone 11 Pro Max',
       'Moto E22', 'Redmi Note 8', 'Galaxy S20 FE', 'iPhone 11', 'X9b',
       'iPhone SE (2nd generation)', 'Galaxy S23', 'iPhone 14 Pro Max',
       'Moto E13', 'Redmi Note 10S', 'Redmi 12 5G', 'Moto E32',
       'Galaxy Z Flip 5', 'iPhone 13 Pro', 'Moto G84 5G', 'Galaxy Tab S8',
       'iPhone 13', 'Honor 9X(China)', 'iPhone 13 Pro Max', 'Y73',
       'iPad (9th gen)', 'iPhone 14', 'BISON Pro', 'Moto e20', 'A77s',
       'Galaxy S22 Ultra', 'moto g53 5G', 'iPhone', 'iPhone XS Max',
       'C35', 'X8A', 'moto g(60)s', 'Galaxy S24+', 'Axon 50 Lite',
       'Galaxy A04', 'iPhone 8', 'iPad Air (4th gen)', 'iPhone 12 mini',
       'Galaxy A03 Core', 'Redmi Note 12 Pro 4G', 'Galaxy A14 5G',
       'Galaxy A32 5G', 'moto g23', 'X7a', 'Redmi Note 12 4G',
       'Galaxy A71 5G', 'iPhone XR', 'Galaxy A14', 'Galaxy S24 Ultra',
       'Moto G32', 'Redmi Note 5 Pro', 'iPhone 12', 'iPad Pro (11-inch)',
       'Edge 30 Neo', 'Redmi 

In [117]:
path

'C:/Users/HP/Desktop/fugo/'

In [119]:
def ff(i):
    general_path = "C:/Users/HP/Desktop/fugo/ind/" + i
    aa = pd.read_csv(general_path)
    print("sadsad")
    aa.drop("Unnamed: 0",axis=1).set_index("ID").to_csv("C:/Users/HP/Desktop/fugo/new/"+ f"{i}")

In [171]:
for i in os.listdir("C:/Users/HP/Desktop/fugo/ind/"):
    if "5000" in i:
        print(i)
        ff(i)

_new_trial_v2_5000_0.05_False_fugo.csv
sadsad
_new_trial_v2_5000_0.05_True_fugo.csv
sadsad
_new_trial_v2_5000_0.5_True_fugo.csv
sadsad


In [172]:
!kaggle competitions submit -c dacfugo -f C:/Users/HP/Desktop/fugo/new/_new_trial_v2_5000_0.5_True_fugo.csv -m "sub2"


Successfully submitted to DAC - Fugo Case



  0%|          | 0.00/15.5M [00:00<?, ?B/s]
  1%|          | 112k/15.5M [00:00<00:15, 1.02MB/s]
  4%|▍         | 624k/15.5M [00:00<00:05, 3.04MB/s]
  6%|▌         | 928k/15.5M [00:00<00:05, 3.00MB/s]
 11%|█         | 1.66M/15.5M [00:00<00:03, 4.49MB/s]
 13%|█▎        | 2.08M/15.5M [00:00<00:05, 2.43MB/s]
 15%|█▌        | 2.41M/15.5M [00:01<00:07, 1.79MB/s]
 19%|█▊        | 2.91M/15.5M [00:01<00:05, 2.34MB/s]
 23%|██▎       | 3.61M/15.5M [00:01<00:03, 3.25MB/s]
 27%|██▋       | 4.12M/15.5M [00:01<00:03, 3.65MB/s]
 29%|██▉       | 4.58M/15.5M [00:01<00:03, 3.54MB/s]
 32%|███▏      | 4.98M/15.5M [00:01<00:03, 3.10MB/s]
 34%|███▍      | 5.34M/15.5M [00:01<00:03, 2.76MB/s]
 39%|███▉      | 6.09M/15.5M [00:02<00:02, 3.78MB/s]
 42%|████▏     | 6.53M/15.5M [00:02<00:02, 3.64MB/s]
 45%|████▍     | 6.94M/15.5M [00:02<00:02, 3.63MB/s]
 47%|████▋     | 7.33M/15.5M [00:02<00:02, 3.41MB/s]
 50%|████▉     | 7.69M/15.5M [00:02<00:02, 3.43MB/s]
 52%|█████▏    | 8.05M/15.5M [00:02<00:02, 3.48MB/s]
 55%

In [138]:
i

'100_0.05_True_fugo.csv'

In [137]:
for i in os.listdir("C:/Users/HP/Desktop/fugo/new/"):
    !kaggle competitions submit -c dacfugo -f C:/Users/HP/Desktop/fugo/new/i -m "subbb"



Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\HP\AppData\Local\Programs\Python\Python311\Scripts\kaggle.exe\__main__.py", line 7, in <module>
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\kaggle\cli.py", line 54, in main
    out = args.func(**command_args)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\kaggle\api\kaggle_api_extended.py", line 806, in competition_submit_cli
    submit_result = self.competition_submit(file_name, message,
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\kaggle\api\kaggle_api_extended.py", line 757, in competition_submit
    content_length=os.path.getsize(file_name),
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<frozen genericpath>", line 50, in gets

In [45]:
def get_submission_df(selected_cols):
    df = sub_df.copy()
    y_pred_sub, _, _ = get_submissions(selected_cols, merged_train, merged_test)

    df["TARGET"] = y_pred_sub
    # df.to_csv("")
    return df

In [None]:
get_submission_df( "advanced")

In [58]:
train_cols

[['LevelAdvancedCountD0',
  'LevelAdvancedCountD1',
  'LevelAdvancedCountD2',
  'LevelAdvancedCountD3',
  'LevelAdvancedCountD4',
  'LevelAdvancedCountD5',
  'LevelAdvancedCountD6',
  'LevelAdvancedCountD7',
  'LevelAdvancedCountD8',
  'LevelAdvancedCountD9',
  'LevelAdvancedCountD10',
  'LevelAdvancedCountD11',
  'LevelAdvancedCountD12',
  'LevelAdvancedCountD13',
  'LevelAdvancedCountD14',
  'LevelAdvancedCountD15']]

In [57]:
"LevelAdvancedCountD1" in merged_test.columns

True

In [None]:
!kaggle competitions submit -c h-and-m-personalized-fashion-recommendations  -f ../input/h-and-m-personalized-fashion-recommendations/sample_submission.csv -m "Submission via API"

In [None]:
import os
os.environ["KAGGLE_USERNAME"]="ademsara"
os.environ["KAGGLE_KEY"]="yourapikey"

In [132]:
for 

!kaggle competitions submit -c dacfugo -f C:/Users/HP/Desktop/fugo/new/500_0.1_True_fugo.csv -m "sub1"

Successfully submitted to DAC - Fugo Case



  0%|          | 0.00/19.4M [00:00<?, ?B/s]
  1%|          | 112k/19.4M [00:00<00:20, 1.01MB/s]
  3%|▎         | 496k/19.4M [00:00<00:07, 2.63MB/s]
  6%|▌         | 1.08M/19.4M [00:00<00:04, 4.09MB/s]
  9%|▊         | 1.67M/19.4M [00:00<00:03, 4.86MB/s]
 11%|█         | 2.14M/19.4M [00:00<00:06, 2.90MB/s]
 13%|█▎        | 2.50M/19.4M [00:00<00:07, 2.27MB/s]
 14%|█▍        | 2.80M/19.4M [00:01<00:07, 2.38MB/s]
 16%|█▋        | 3.16M/19.4M [00:01<00:06, 2.68MB/s]
 20%|█▉        | 3.81M/19.4M [00:01<00:04, 3.62MB/s]
 22%|██▏       | 4.34M/19.4M [00:01<00:03, 4.04MB/s]
 25%|██▌       | 4.86M/19.4M [00:01<00:03, 4.38MB/s]
 28%|██▊       | 5.36M/19.4M [00:01<00:03, 4.48MB/s]
 31%|███       | 6.02M/19.4M [00:01<00:02, 4.88MB/s]
 34%|███▍      | 6.56M/19.4M [00:01<00:02, 5.09MB/s]
 37%|███▋      | 7.08M/19.4M [00:01<00:02, 5.17MB/s]
 40%|███▉      | 7.67M/19.4M [00:02<00:02, 5.29MB/s]
 43%|████▎     | 8.38M/19.4M [00:02<00:02, 5.58MB/s]
 46%|████▋     | 8.98M/19.4M [00:02<00:01, 5.77MB/s]
 49