In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


# Load the training and test datasets

In [2]:
import pandas as pd
train_data = pd.read_csv('/kaggle/input/ml-dataset/Acoustic_train.csv')
test_data = pd.read_csv('/kaggle/input/ml-dataset/Acoustic_test_no_class.csv')

# Preview the datasets

In [3]:
print("Training Data Shape:", train_data.shape)
print("Test Data Shape:", test_data.shape)

Training Data Shape: (300, 52)
Test Data Shape: (100, 51)


# Separate features and target from the training dataset

In [4]:
X_train = train_data.drop(columns=['Class'])
y_train = train_data['Class']

# Standardize the feature columns

In [5]:
scaler = StandardScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(test_data)

# Split the training data into train and validation sets

In [6]:
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train_scale, y_train, test_size=0.2, random_state=42
)

# Initialize and train the Random Forest model

In [7]:
random_forest = RandomForestClassifier(random_state=42, n_estimators=200, max_depth=10)
random_forest.fit(X_train_split, y_train_split)

# Make predictions and calculate validation accuracy

In [8]:
rf_val_predictions = random_forest.predict(X_val_split)
rf_validation_accuracy = accuracy_score(y_val_split, rf_val_predictions)

# Print validation accuracy

In [9]:
print(f"Random Forest Validation Accuracy: {rf_validation_accuracy * 100:.2f}%")


Random Forest Validation Accuracy: 83.33%


# Predict the class labels for the test dataset

In [10]:
test_predictions = random_forest.predict(X_test_scale)

# Add the predictions as a new column in the test dataset

In [11]:
test_data_predictions = test_data.copy()
test_data_predictions['Predicted Class'] = test_predictions


test_data_predictions.to_csv("final.csv", index=False)
