In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

In [None]:
# Vulgar

import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ========== CONFIG ==========
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'  # ⬅️ Pre-extracted test features
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
# ============================

# Load pre-extracted image features
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)

with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# Load training labels
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)

# Filter for matching training features
train_ids = set(train_feature_dict.keys())
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_ids)].copy()

# Prepare training data
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row['Vulgar'])

X = np.array(X)
y = np.array(y)

# Encode class labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Balance using ADASYN
adasyn = ADASYN(random_state=42)
X_bal, y_bal = adasyn.fit_resample(X, y_encoded)

# Train XGBoost Classifier
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=10,
    subsample=0.9,
    colsample_bytree=0.9,
    scale_pos_weight=1,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=36
)

xgb_classifier.fit(X_bal, y_bal)

# Load Test Metadata
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)

# Match with test feature dictionary
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)

# Predict
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# Prepare final DataFrame
result_df = pd.DataFrame({'Ids': matched_ids, 'Vulgar': y_test_label})

# Save to Google Drive
output_path = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
result_df.to_csv(output_path, index=False)
print(f"\nPredictions saved to: {output_path}")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


KeyboardInterrupt: 

In [None]:
#Abuse
# Vulgar Detection with XGBoost (Corrected: Separate Test Feature Loading)

import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ========== CONFIG ==========
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
# ============================

# Load pre-extracted image features
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)

with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# Load training labels
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)

# Filter for matching training features
train_ids = set(train_feature_dict.keys())
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_ids)].copy()

# Prepare training data
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row['Abuse'])

X = np.array(X)
y = np.array(y)

# Encode class labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Balance using ADASYN
adasyn = ADASYN(random_state=42)
X_bal, y_bal = adasyn.fit_resample(X, y_encoded)

# Train XGBoost Classifier
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=10,
    subsample=0.9,
    colsample_bytree=0.9,
    scale_pos_weight=1,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=36
)

xgb_classifier.fit(X_bal, y_bal)

# Load Test Metadata
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)

# Match with test feature dictionary
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)

# Predict
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# Prepare final DataFrame
result_df = pd.DataFrame({'Ids': matched_ids, 'Abuse': y_test_label})

# Save to Google Drive
output_path = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
result_df.to_csv(output_path, index=False)
print(f"\n✅ Predictions saved to: {output_path}")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



✅ Predictions saved to: /content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv


In [None]:
#Sarcasm
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ======= CONFIG ========
LABEL_TO_PREDICT = 'Sarcasm'  # 🔁 Change this label for each run
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
output_csv_path   = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'


# --- Load features ---
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)
with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# --- Load labels ---
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_feature_dict)].copy()

# --- Prepare training data for specific label ---
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row[LABEL_TO_PREDICT])

X = np.array(X)
y = np.array(y)

# --- Encode and balance ---
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_bal, y_bal = ADASYN(random_state=42).fit_resample(X, y_encoded)

# --- Train classifier ---
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300, learning_rate=0.05, max_depth=10,
    subsample=0.9, colsample_bytree=0.9, scale_pos_weight=1,
    use_label_encoder=False, eval_metric='logloss', random_state=36
)
xgb_classifier.fit(X_bal, y_bal)

# --- Prepare test data ---
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# --- Load or create result DataFrame ---
if os.path.exists(output_csv_path):
    result_df = pd.read_csv(output_csv_path)
else:
    result_df = pd.DataFrame({'Ids': matched_ids})

# --- Merge new label predictions ---
new_label_df = pd.DataFrame({'Ids': matched_ids, LABEL_TO_PREDICT: y_test_label})
result_df = result_df.merge(new_label_df, on='Ids', how='outer')

# --- Save final results ---
result_df.to_csv(output_csv_path, index=False)
print(f"\n✅ '{LABEL_TO_PREDICT}' predictions added to: {output_csv_path}")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



✅ 'Sarcasm' predictions added to: /content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv


In [None]:
#Vulgar
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ======= CONFIG ========
LABEL_TO_PREDICT = 'Vulgar'  # 🔁 Change this label for each run
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
output_csv_path   = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
# =======================

# --- Load features ---
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)
with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# --- Load labels ---
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_feature_dict)].copy()

# --- Prepare training data for specific label ---
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row[LABEL_TO_PREDICT])

X = np.array(X)
y = np.array(y)

# --- Encode and balance ---
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_bal, y_bal = ADASYN(random_state=42).fit_resample(X, y_encoded)

# --- Train classifier ---
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300, learning_rate=0.05, max_depth=10,
    subsample=0.9, colsample_bytree=0.9, scale_pos_weight=1,
    use_label_encoder=False, eval_metric='logloss', random_state=36
)
xgb_classifier.fit(X_bal, y_bal)

# --- Prepare test data ---
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# --- Load or create result DataFrame ---
if os.path.exists(output_csv_path):
    result_df = pd.read_csv(output_csv_path)
else:
    result_df = pd.DataFrame({'Ids': matched_ids})

# --- Merge new label predictions ---
new_label_df = pd.DataFrame({'Ids': matched_ids, LABEL_TO_PREDICT: y_test_label})
result_df = result_df.merge(new_label_df, on='Ids', how='outer')

# --- Save final results ---
result_df.to_csv(output_csv_path, index=False)
print(f"\n✅ '{LABEL_TO_PREDICT}' predictions added to: {output_csv_path}")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



✅ 'Vulgar' predictions added to: /content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv


In [None]:
#Sentiment
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ======= CONFIG ========
LABEL_TO_PREDICT = 'Sentiment'  # 🔁 Change this label for each run
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
output_csv_path   = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
# =======================

# --- Load features ---
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)
with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# --- Load labels ---
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_feature_dict)].copy()

# --- Prepare training data for specific label ---
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row[LABEL_TO_PREDICT])

X = np.array(X)
y = np.array(y)

# --- Encode and balance ---
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_bal, y_bal = ADASYN(random_state=42).fit_resample(X, y_encoded)

# --- Train classifier ---
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300, learning_rate=0.05, max_depth=10,
    subsample=0.9, colsample_bytree=0.9, scale_pos_weight=1,
    use_label_encoder=False, eval_metric='logloss', random_state=36
)
xgb_classifier.fit(X_bal, y_bal)

# --- Prepare test data ---
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# --- Load or create result DataFrame ---
if os.path.exists(output_csv_path):
    result_df = pd.read_csv(output_csv_path)
else:
    result_df = pd.DataFrame({'Ids': matched_ids})

# --- Merge new label predictions ---
new_label_df = pd.DataFrame({'Ids': matched_ids, LABEL_TO_PREDICT: y_test_label})
result_df = result_df.merge(new_label_df, on='Ids', how='outer')

# --- Save final results ---
result_df.to_csv(output_csv_path, index=False)
print(f"\n✅ '{LABEL_TO_PREDICT}' predictions added to: {output_csv_path}")


Parameters: { "scale_pos_weight", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



✅ 'Sentiment' predictions added to: /content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv


In [None]:
#Target
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ======= CONFIG ========
LABEL_TO_PREDICT = 'Target'  # 🔁 Change this label for each run
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
output_csv_path   = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
# =======================

# --- Load features ---
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)
with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# --- Load labels ---
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_feature_dict)].copy()

# --- Prepare training data for specific label ---
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row[LABEL_TO_PREDICT])

X = np.array(X)
y = np.array(y)

# --- Encode and balance ---
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_bal, y_bal = ADASYN(random_state=42).fit_resample(X, y_encoded)

# --- Train classifier ---
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300, learning_rate=0.05, max_depth=10,
    subsample=0.9, colsample_bytree=0.9, scale_pos_weight=1,
    use_label_encoder=False, eval_metric='logloss', random_state=36
)
xgb_classifier.fit(X_bal, y_bal)

# --- Prepare test data ---
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# --- Load or create result DataFrame ---
if os.path.exists(output_csv_path):
    result_df = pd.read_csv(output_csv_path)
else:
    result_df = pd.DataFrame({'Ids': matched_ids})

# --- Merge new label predictions ---
new_label_df = pd.DataFrame({'Ids': matched_ids, LABEL_TO_PREDICT: y_test_label})
result_df = result_df.merge(new_label_df, on='Ids', how='outer')

# --- Save final results ---
result_df.to_csv(output_csv_path, index=False)
print(f"\n✅ '{LABEL_TO_PREDICT}' predictions added to: {output_csv_path}")


Parameters: { "scale_pos_weight", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



✅ 'Target' predictions added to: /content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv


In [None]:
# Sentiment

import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ========== CONFIG ==========
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'  # ⬅️ Pre-extracted test features
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
# ============================

# Load pre-extracted image features
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)

with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# Load training labels
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)

# Filter for matching training features
train_ids = set(train_feature_dict.keys())
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_ids)].copy()

# Prepare training data
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row['Sentiment'])

X = np.array(X)
y = np.array(y)

# Encode class labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Balance using ADASYN
adasyn = ADASYN(random_state=42)
X_bal, y_bal = adasyn.fit_resample(X, y_encoded)

# Train XGBoost Classifier
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=10,
    subsample=0.9,
    colsample_bytree=0.9,
    scale_pos_weight=1,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=36
)

xgb_classifier.fit(X_bal, y_bal)

# Load Test Metadata
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)

# Match with test feature dictionary
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)

# Predict
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# Prepare final DataFrame
result_df = pd.DataFrame({'Ids': matched_ids, 'Sentiment': y_test_label})

# Save to Google Drive
output_path = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
result_df.to_csv(output_path, index=False)
print(f"\n✅ Predictions saved to: {output_path}")


In [None]:
# Target

import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import ADASYN
import xgboost as xgb

# ========== CONFIG ==========
train_feature_pkl = '/content/drive/MyDrive/Resnet_Method/Hindi_Image_Features_R101.pkl'
test_feature_pkl  = '/content/drive/MyDrive/Resnet_Method/Test_Hindi_Image_Features_R101.pkl'  # ⬅️ Pre-extracted test features
train_csv_path    = '/content/drive/MyDrive/Datasets/HASOC-2025/Train/Hindi_train_2025/Hindi_train_data.csv'
test_csv_path     = '/content/drive/MyDrive/Datasets/HASOC-2025/Test/Hindi_test_2025/Hindi_test_data_wo_label.csv'
# ============================

# Load pre-extracted image features
with open(train_feature_pkl, 'rb') as f:
    train_feature_dict = pickle.load(f)

with open(test_feature_pkl, 'rb') as f:
    test_feature_dict = pickle.load(f)

# Load training labels
df = pd.read_csv(train_csv_path)
df['Ids'] = df['Ids'].astype(str)

# Filter for matching training features
train_ids = set(train_feature_dict.keys())
df_filtered = df[df['Ids'].apply(lambda x: x.split('.')[0] in train_ids)].copy()

# Prepare training data
X, y = [], []
for _, row in df_filtered.iterrows():
    fname = row['Ids'].split('.')[0]
    X.append(train_feature_dict[fname])
    y.append(row['Target'])

X = np.array(X)
y = np.array(y)

# Encode class labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Balance using ADASYN
adasyn = ADASYN(random_state=42)
X_bal, y_bal = adasyn.fit_resample(X, y_encoded)

# Train XGBoost Classifier
xgb_classifier = xgb.XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=10,
    subsample=0.9,
    colsample_bytree=0.9,
    scale_pos_weight=1,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=36
)

xgb_classifier.fit(X_bal, y_bal)

# Load Test Metadata
test_df = pd.read_csv(test_csv_path)
test_df['Ids'] = test_df['Ids'].astype(str)

# Match with test feature dictionary
X_test = []
matched_ids = []

for _, row in test_df.iterrows():
    fname = row['Ids'].split('.')[0]
    if fname in test_feature_dict:
        X_test.append(test_feature_dict[fname])
        matched_ids.append(row['Ids'])

X_test = np.array(X_test)

# Predict
y_test_pred = xgb_classifier.predict(X_test)
y_test_label = label_encoder.inverse_transform(y_test_pred)

# Prepare final DataFrame
result_df = pd.DataFrame({'Ids': matched_ids, 'Target': y_test_label})

# Save to Google Drive
output_path = '/content/drive/My Drive/Hasoc_Result-2025/Hindi_Predictions_Result_using_Resnet.csv'
result_df.to_csv(output_path, index=False)
print(f"\n✅ Predictions saved to: {output_path}")
