In [5]:
import pandas as pd
import joblib
import os

# Paths
video_folder = r"C:\Users\Muralish\Desktop\Sapphires\Test\video"
data_file_path = r"C:\Users\Muralish\Desktop\Sapphires_Cleaned\combine\updated_final_combined_data.csv"
model_file_path = r"C:\Users\Muralish\Desktop\Sapphires_Cleaned\combine\rf_model.joblib"

# Get ItemID from the .mp4 file
mp4_files = [f for f in os.listdir(video_folder) if f.endswith(".mp4")]
if not mp4_files:
    print("❌ No MP4 file found in the folder.")
    exit()

item_id = os.path.splitext(mp4_files[0])[0]  # Extract ItemID from filename (without .mp4)

# Load the dataset
df = pd.read_csv(data_file_path)

# Filter the dataset based on ItemID
filtered_df = df[df["ItemID"].astype(str) == item_id]

if filtered_df.empty:
    print(f"❌ No data found for ItemID: {item_id}")
    exit()

# Save the extracted data
filtered_csv_path = os.path.join(video_folder, f"{item_id}_filtered.csv")
filtered_df.to_csv(filtered_csv_path, index=False)
print(f"✅ Extracted data saved to: {filtered_csv_path}")

# Load trained model
model = joblib.load(model_file_path)

# Select features (columns 92 to 122)
features = filtered_df.iloc[:, 92:122]  

# Make predictions
predictions = model.predict(features)

# Retain original target column names (assuming targets were in columns 1-91)
output_columns = df.columns[1:92]  

# Create a DataFrame for predictions
predictions_df = pd.DataFrame(predictions, columns=output_columns)

# Reattach ItemID
predictions_df.insert(0, "ItemID", item_id)

# Save the classification results
classified_csv_path = os.path.join(video_folder, f"{item_id}_classified_results.csv")
predictions_df.to_csv(classified_csv_path, index=False)
print(f"✅ Classification complete! Results saved to: {classified_csv_path}")


✅ Extracted data saved to: C:\Users\Muralish\Desktop\Sapphires\Test\video\S5709_filtered.csv
✅ Classification complete! Results saved to: C:\Users\Muralish\Desktop\Sapphires\Test\video\S5709_classified_results.csv


In [8]:
import pandas as pd
import joblib
import os

# Paths
video_folder = r"C:\Users\Muralish\Desktop\Sapphires\Test\video"
data_file_path = r"C:\Users\Muralish\Desktop\Sapphires_Cleaned\combine\updated_final_combined_data.csv"
model_file_path = r"C:\Users\Muralish\Desktop\Sapphires_Cleaned\combine\rf_model.joblib"

# Get ItemID from the .mp4 file
mp4_files = [f for f in os.listdir(video_folder) if f.endswith(".mp4")]
if not mp4_files:
    print("❌ No MP4 file found in the folder.")
    exit()

item_id = os.path.splitext(mp4_files[0])[0]  # Extract ItemID from filename (without .mp4)

# Load the dataset
df = pd.read_csv(data_file_path)

# Filter the dataset based on ItemID
filtered_df = df[df["ItemID"].astype(str) == item_id]

if filtered_df.empty:
    print(f"❌ No data found for ItemID: {item_id}")
    exit()

# Save the extracted data
filtered_csv_path = os.path.join(video_folder, f"{item_id}_filtered.csv")
filtered_df.to_csv(filtered_csv_path, index=False)
print(f"✅ Extracted data saved to: {filtered_csv_path}")

# Load trained model
model = joblib.load(model_file_path)

# Select features (columns 92 to 122)
features = filtered_df.iloc[:, 92:122]

# Make predictions
predictions = model.predict(features)

# Retain original target column names (assuming targets were in columns 1-91)
output_columns = df.columns[1:92]

# Create a DataFrame for predictions
predictions_df = pd.DataFrame(predictions, columns=output_columns)

# Reattach ItemID
predictions_df.insert(0, "ItemID", item_id)

# Save the classification results
classified_csv_path = os.path.join(video_folder, f"{item_id}_classified_results.csv")
predictions_df.to_csv(classified_csv_path, index=False)
print(f"✅ Classification complete! Results saved to: {classified_csv_path}")

# ------- Extract the best classified result -------
# Exclude ItemID column from selection
columns_to_check = predictions_df.columns[1:]

# Find the row with the most `1` values
best_row_index = predictions_df[columns_to_check].sum(axis=1).idxmax()
best_row = predictions_df.loc[[best_row_index]]

# Keep only columns where the value is `1`, plus ItemID
best_row_filtered = best_row.loc[:, (best_row == 1).any()]  # Retains columns where at least one value is 1
best_row_filtered.insert(0, "ItemID", item_id)  # Ensure ItemID remains

# Save the filtered best row
best_csv_path = os.path.join(video_folder, f"{item_id}_best_classified_result.csv")
best_row_filtered.to_csv(best_csv_path, index=False)

print(f"✅ Best classified result (only 1s) saved to: {best_csv_path}")


✅ Extracted data saved to: C:\Users\Muralish\Desktop\Sapphires\Test\video\S5709_filtered.csv
✅ Classification complete! Results saved to: C:\Users\Muralish\Desktop\Sapphires\Test\video\S5709_classified_results.csv
✅ Best classified result (only 1s) saved to: C:\Users\Muralish\Desktop\Sapphires\Test\video\S5709_best_classified_result.csv
