In [3]:
! pip install -q datasets
! pip install evaluate



In [4]:
import pandas as pd
df = pd.read_csv("/content/train.csv")

In [5]:
print(df.head())
print(df.columns)
print(df.shape)

   id keyword location                                               text  \
0   1     NaN      NaN  Our Deeds are the Reason of this #earthquake M...   
1   4     NaN      NaN             Forest fire near La Ronge Sask. Canada   
2   5     NaN      NaN  All residents asked to 'shelter in place' are ...   
3   6     NaN      NaN  13,000 people receive #wildfires evacuation or...   
4   7     NaN      NaN  Just got sent this photo from Ruby #Alaska as ...   

   target  
0       1  
1       1  
2       1  
3       1  
4       1  
Index(['id', 'keyword', 'location', 'text', 'target'], dtype='object')
(7613, 5)


In [6]:
X = df["text"]
y = df["target"]

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

model = Pipeline([
    ("tfidf", TfidfVectorizer(
        stop_words="english",
        max_features=10000,
        ngram_range=(1, 2)
    )),
    ("clf", LogisticRegression(
        max_iter=1000
    ))
])

model.fit(X_train, y_train)

In [9]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_val)

print("Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))


Accuracy: 0.8154957321076822
              precision    recall  f1-score   support

           0       0.80      0.90      0.85       869
           1       0.84      0.70      0.77       654

    accuracy                           0.82      1523
   macro avg       0.82      0.80      0.81      1523
weighted avg       0.82      0.82      0.81      1523



In [10]:
test_df = pd.read_csv("/content/test.csv")
print(test_df.head())
print(test_df.columns)

   id keyword location                                               text
0   0     NaN      NaN                 Just happened a terrible car crash
1   2     NaN      NaN  Heard about #earthquake is different cities, s...
2   3     NaN      NaN  there is a forest fire at spot pond, geese are...
3   9     NaN      NaN           Apocalypse lighting. #Spokane #wildfires
4  11     NaN      NaN      Typhoon Soudelor kills 28 in China and Taiwan
Index(['id', 'keyword', 'location', 'text'], dtype='object')


In [11]:
test_predictions = model.predict(test_df["text"])
print("Sample predictions:", test_predictions[:10])

Sample predictions: [1 1 1 1 1 1 0 0 0 0]
