In [47]:
#####
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
train = pd.read_csv('/kaggle/input/djezzy/new_trainDjezzy.csv')
test = pd.read_csv('/kaggle/input/djezzy/TestDJEZZY.csv')
train['SUBSCRIPTION_DATE'] = pd.to_datetime(train['SUBSCRIPTION_DATE'])
train['USAGE_DATE'] = pd.to_datetime(train['USAGE_DATE'])
test['SUBSCRIPTION_DATE'] = pd.to_datetime(test['SUBSCRIPTION_DATE'])
test['USAGE_DATE'] = pd.to_datetime(test['USAGE_DATE'])

X_train = train.drop(['Product_ID', 'SUBSCRIPTION_DATE', 'subscribers'], axis=1)
y_train = train['Product_ID']
X_test = test.drop(['SUBSCRIPTION_DATE', 'USAGE_DATE', 'subscribers'], axis=1)
categorical_features = ['usage_Type', 'Destination']
numerical_features = ['Amount_DZD', 'Amount_data_DZD', 'Volume_Data_KB', 'Volume_KB_SC_Nbr', 'Nb_USAGE']
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])
pipeline.fit(X_train, y_train)
predicted_product_ids = pipeline.predict(test)
submission = pd.DataFrame({
    'subscribers': test['subscribers'],
    'Product_ID': predicted_product_ids
})
if submission['subscribers'].duplicated().any():
    print("Duplicates found. Aggregating predictions...")
    submission = submission.groupby('subscribers')['Product_ID'].agg(lambda x: x.mode()[0]).reset_index()
submission.to_csv('submission.csv', index=False)
print("Submission file created successfully! No duplicates found.")

Duplicates found. Aggregating predictions...
Submission file created successfully! No duplicates found.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

train = pd.read_csv('/kaggle/input/djezzy/new_trainDjezzy.csv')
test = pd.read_csv('/kaggle/input/djezzy/TestDJEZZY.csv')
train['SUBSCRIPTION_DATE'] = pd.to_datetime(train['SUBSCRIPTION_DATE'])
train['USAGE_DATE'] = pd.to_datetime(train['USAGE_DATE'])
train['Month'] = train['SUBSCRIPTION_DATE'].dt.month
X = train.drop(['Product_ID', 'SUBSCRIPTION_DATE', 'subscribers'], axis=1)
y = train['Product_ID']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.01, random_state=42)  # 20% for validation
categorical_features = ['usage_Type', 'Destination']
numerical_features = ['Amount_DZD', 'Amount_data_DZD', 'Volume_Data_KB', 'Volume_KB_SC_Nbr', 'Nb_USAGE']
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
X_test = test.drop(['SUBSCRIPTION_DATE',, 'subscribers'], axis=1)
predicted_product_ids = pipeline.predict(X_test)
submission = pd.DataFrame({
    'subscribers': test['subscribers'], 
    'Product_ID': predicted_product_ids
})
submission.to_csv('submission.csv', index=False)
print("Submission file created successfully!")

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
train = pd.read_csv('/kaggle/input/djezzy/new_trainDjezzy.csv')
train['SUBSCRIPTION_DATE'] = pd.to_datetime(train['SUBSCRIPTION_DATE'])
train['USAGE_DATE'] = pd.to_datetime(train['USAGE_DATE'])
train['Month'] = train['SUBSCRIPTION_DATE'].dt.month
X = train.drop(['Product_ID', 'SUBSCRIPTION_DATE','USAGE_DATE', 'subscribers','Ines'], axis=1)
y = train['Product_ID']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
categorical_features = X_train.select_dtypes(include=['object']).columns.tolist()
numerical_features = X_train.select_dtypes(include=['int', 'float']).columns.tolist()
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],    remainder='passthrough')
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_val_preprocessed = preprocessor.transform(X_val)
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_val_encoded = encoder.transform(y_val)
model = Sequential([
    Input(shape=(X_train_preprocessed.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(len(encoder.classes_), activation='softmax')  
])
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])
history = model.fit(X_train_preprocessed, y_train_encoded, 
                    epochs=20, 
                    validation_data=(X_val_preprocessed, y_val_encoded))
test = pd.read_csv('/kaggle/input/djezzy/TestDJEZZY.csv')
test['SUBSCRIPTION_DATE'] = pd.to_datetime(test['SUBSCRIPTION_DATE'])
test['USAGE_DATE'] = pd.to_datetime(test['USAGE_DATE'])
test['Month'] = test['SUBSCRIPTION_DATE'].dt.month
X_test = test.drop(['SUBSCRIPTION_DATE', 'USAGE_DATE', 'subscribers'], axis=1)
X_test_preprocessed = preprocessor.transform(X_test)
test_predictions = model.predict(X_test_preprocessed)
test_predicted_labels = encoder.inverse_transform(test_predictions.argmax(axis=1))
submission = pd.DataFrame({
    'subscribers': test['subscribers'],
    'Product_ID': test_predicted_labels
})
submission = submission.drop_duplicates(subset=['subscribers'])
submission.to_csv('submission.csv', index=False)
print("Submission file created successfully!")

In [None]:
#####
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
train = pd.read_csv('/kaggle/input/djezzy/new_trainDjezzy.csv')
test = pd.read_csv('/kaggle/input/djezzy/TestDJEZZY.csv')
train['SUBSCRIPTION_DATE'] = pd.to_datetime(train['SUBSCRIPTION_DATE'])
train['USAGE_DATE'] = pd.to_datetime(train['USAGE_DATE'])
test['SUBSCRIPTION_DATE'] = pd.to_datetime(test['SUBSCRIPTION_DATE'])
test['USAGE_DATE'] = pd.to_datetime(test['USAGE_DATE'])
train = train.drop(['Ines'])

In [None]:
train 

In [None]:

X_train = train.drop(['Product_ID', 'SUBSCRIPTION_DATE', 'subscribers'], axis=1)
y_train = train['Product_ID']
X_test = test.drop(['SUBSCRIPTION_DATE', 'USAGE_DATE', 'subscribers'], axis=1)
categorical_features = ['usage_Type', 'Destination']
numerical_features = ['Amount_DZD', 'Amount_data_DZD', 'Volume_Data_KB', 'Volume_KB_SC_Nbr', 'Nb_USAGE']
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])
pipeline.fit(X_train, y_train)
predicted_product_ids = pipeline.predict(test)
submission = pd.DataFrame({
    'subscribers': test['subscribers'],
    'Product_ID': predicted_product_ids
})
if submission['subscribers'].duplicated().any():
    print("Duplicates found. Aggregating predictions...")
    submission = submission.groupby('subscribers')['Product_ID'].agg(lambda x: x.mode()[0]).reset_index()
submission.to_csv('submission.csv', index=False)
print("Submission file created successfully! No duplicates found.")