<a href="https://colab.research.google.com/github/Gulshan89765/LandmarkDetection/blob/main/Another_copy_of_Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn import metrics

# Extended dataset (8 per class)
texts = [
    # Sports
    "The team won the football match yesterday.",
    "The player scored a goal in the final game.",
    "The coach announced the new team lineup.",
    "The tennis championship was exciting.",
    "The cricketer hit a century.",
    "The stadium was full during the match.",
    "He trained hard for the marathon.",
    "They won the basketball tournament.",

    # Politics
    "The government passed a new healthcare policy.",
    "Elections will be held next month.",
    "The senator addressed the nation in a speech.",
    "Parliament debates the new education bill.",
    "The president gave a national address.",
    "The prime minister held a press conference.",
    "The political party announced their manifesto.",
    "The new law was signed by the president."
]

# Labels: 0 = sports, 1 = politics
labels = [0] * 8 + [1] * 8

# Split data (stratified for balance)
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.25, random_state=42, stratify=labels
)

# SVM pipeline
svm_model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', SVC(kernel='linear'))  # Linear kernel works well for text
])

# Random Forest pipeline
rf_model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train and evaluate SVM
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)
print("SVM Accuracy:", metrics.accuracy_score(y_test, svm_preds))
print("SVM Classification Report:\n", metrics.classification_report(y_test, svm_preds))

# Train and evaluate Random Forest
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)
print("Random Forest Accuracy:", metrics.accuracy_score(y_test, rf_preds))
print("Random Forest Classification Report:\n", metrics.classification_report(y_test, rf_preds))

# Test with new samples
test_samples = [
    "The government introduced a new law.",
    "The striker scored two goals in the first half."
]

svm_test_preds = svm_model.predict(test_samples)
rf_test_preds = rf_model.predict(test_samples)

print("\nSVM Predictions:")
for text, label in zip(test_samples, svm_test_preds):
    print(f"Text: '{text}' => Predicted: {'Politics' if label == 1 else 'Sports'}")

print("\nRandom Forest Predictions:")
for text, label in zip(test_samples, rf_test_preds):
    print(f"Text: '{text}' => Predicted: {'Politics' if label == 1 else 'Sports'}")


SVM Accuracy: 0.75
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.50      0.67         2

    accuracy                           0.75         4
   macro avg       0.83      0.75      0.73         4
weighted avg       0.83      0.75      0.73         4

Random Forest Accuracy: 0.75
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.50      0.67         2

    accuracy                           0.75         4
   macro avg       0.83      0.75      0.73         4
weighted avg       0.83      0.75      0.73         4


SVM Predictions:
Text: 'The government introduced a new law.' => Predicted: Politics
Text: 'The striker scored two goals in the first half.' => Predicted: Sports

Random Forest Predictions:
Text: 'The government introduced a new la

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn import metrics

# Extended dataset (8 per class)
texts = [
    # Sports
    "The team won the football match yesterday.",
    "The player scored a goal in the final game.",
    "The coach announced the new team lineup.",
    "The tennis championship was exciting.",
    "The cricketer hit a century.",
    "The stadium was full during the match.",
    "He trained hard for the marathon.",
    "They won the basketball tournament.",

    # Politics
    "The government passed a new healthcare policy.",
    "Elections will be held next month.",
    "The senator addressed the nation in a speech.",
    "Parliament debates the new education bill.",
    "The president gave a national address.",
    "The prime minister held a press conference.",
    "The political party announced their manifesto.",
    "The new law was signed by the president."
]

# Labels: 0 = sports, 1 = politics
labels = [0] *8 + [1]*8

# Split data (stratified for balance)
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.25, random_state=42, stratify=labels
)

# Create the model pipeline
model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LogisticRegression(solver='liblinear'))
])

# Train the model
model.fit(X_train, y_train)

# Evaluate on test data
y_pred = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("Classification Report:\n", metrics.classification_report(y_test, y_pred))

# Test with new samples
test_samples = [
    "The striker scored two goals in the first half.",
    "The government introduced a new law."
]

predictions = model.predict(test_samples)
for text, label in zip(test_samples, predictions):
    print(f"Text: '{text}' => Predicted: {'Politics' if label == 1 else 'Sports'}")


Accuracy: 0.75
Classification Report:
               precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.50      0.67         2

    accuracy                           0.75         4
   macro avg       0.83      0.75      0.73         4
weighted avg       0.83      0.75      0.73         4

Text: 'The striker scored two goals in the first half.' => Predicted: Sports
Text: 'The government introduced a new law.' => Predicted: Politics
