In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load data
data_path = r'C:\Users\HP\Downloads\tam_training_data_hum_ai.csv'  # Replace with your file path
data = pd.read_csv(data_path)

# Preprocessing
X = data['DATA']  # Feature column
y = data['LABEL']  # Target column

# Convert text data to numerical features using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # Limit to 5000 features for efficiency
X_tfidf = tfidf_vectorizer.fit_transform(X).toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42, stratify=y)

# Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Add predictions to the original dataframe
data['Predictions'] = clf.predict(tfidf_vectorizer.transform(data['DATA']).toarray())

# Save the predictions to a new CSV file
output_path = "Tam_RandomForest(training).csv"
data.to_csv(output_path, index=False)
print(f"Predictions saved to {output_path}")


Accuracy: 0.91
Classification Report:
              precision    recall  f1-score   support

          AI       0.88      0.94      0.91        81
       HUMAN       0.93      0.88      0.90        81

    accuracy                           0.91       162
   macro avg       0.91      0.91      0.91       162
weighted avg       0.91      0.91      0.91       162

Predictions saved to Tam_RandomForest(training).csv


In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier

# Load training data for model training
train_data_path = r'C:\Users\HP\Downloads\tam_training_data_hum_ai.csv' # Replace with your training data file path
train_data = pd.read_csv(train_data_path)

# Preprocessing training data
X_train = train_data['DATA']  # Feature column
y_train = train_data['LABEL']  # Target column

# Convert text data to numerical features using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # Limit to 5000 features for efficiency
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train).toarray()

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_tfidf, y_train)

# Load test data for predictions
test_data_path = r"C:\Users\HP\Downloads\tam_test_data_hum_ai.xlsx"  # Path to your test data file
test_data = pd.read_excel(test_data_path)

# Preprocessing test data
X_test = test_data['DATA']
X_test_tfidf = tfidf_vectorizer.transform(X_test).toarray()

# Make predictions on test data
test_data['Predictions'] = clf.predict(X_test_tfidf)

# Replace prediction labels with "AI" and "HUMAN"
test_data['Predictions'] = test_data['Predictions'].replace({"A": "AI", "HUMAN": "HUMAN"})

# Save the predictions to a new CSV file
output_path = "Tam_RandomForest.csv"
test_data.to_csv(output_path, index=False)
print(f"Predictions saved to {output_path}")


Predictions saved to Tam_RandomForest.csv
