<a href="https://colab.research.google.com/github/Pratham-Rajeev-Agrawal/Artificial_Idiots/blob/main/Practise_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from transformers import pipeline
import folium
from geopy.geocoders import Nominatim
import time


In [45]:
geolocator = Nominatim(user_agent="tweet_location_mapper", timeout=10)
my_map = folium.Map(location=[20.5937, 78.9629], zoom_start=2, zoom_min=2, zoom_max=6)

In [46]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [47]:
tweets_1 = pd.read_csv('/content/train.csv')

In [48]:
tweets_2 = pd.read_csv('/content/tweets_nitya.txt')

In [49]:
tweets_3 = pd.read_csv('/content/tweets_pratham.txt')

In [50]:
tweets_1 = tweets_1.drop(columns = ['id', 'keyword', 'location'])

In [51]:
tweets_1 = tweets_1.rename(columns = {"text":"Tweet", "target":"Label"})

In [52]:
tweets = pd.concat([tweets_1, tweets_2, tweets_3], ignore_index = True)

In [53]:
port_stem = PorterStemmer()

In [54]:
def stemming(content):
  stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
  stemmed_content = stemmed_content.lower()
  stemmed_content = stemmed_content.split()
  stemmed_content = [port_stem.stem(word) for word in stemmed_content if word not in stopwords.words('english')]
  stemmed_content = ' '.join(stemmed_content)
  return stemmed_content

In [55]:
tweets['Tweet'] = tweets['Tweet'].apply(stemming)

In [56]:
x = tweets['Tweet'].values
y = tweets['Label'].values

In [57]:
vector = TfidfVectorizer()
vector.fit(x)
x = vector.transform(x)

In [58]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify = y, test_size = 0.2, random_state = 2)


In [61]:
model = LogisticRegression()

In [62]:
model.fit(x_train, y_train)

In [63]:
def get_lat_lon(location):
    try:
        location_info = geolocator.geocode(location)
        if location_info:
            return location_info.latitude, location_info.longitude
        else:
            return None, None
    except Exception as e:
        print(f"Error with geocoding: {e}")
        return None, None

In [64]:
def generate_map(input_locations, labels):
    for loc, label in zip(input_locations, labels):
        lat, lon = get_lat_lon(loc)
        if lat and lon:
            folium.Marker(
                [lat, lon],
                popup=loc,
                icon=folium.Icon(color=label)
            ).add_to(my_map)
    return my_map

In [65]:
def handle_input_prediction(input_data):
    sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

    locations = []
    colors = []

    if isinstance(input_data[0], tuple):
        for text, location in input_data:
            processed_data = stemming(text)
            input_tfidf = vector.transform([processed_data])
            prediction = model.predict(input_tfidf)
            print(f"Prediction for {text}: {prediction}")

            if prediction == 1:
                result = sentiment_model(text)
                print(result)

                sentiment_label = result[0]['label']
                dictionary = {"LABEL_0": "red", "LABEL_1": "blue", "LABEL_2": "green"}
                locations.append(location)
                colors.append(dictionary.get(sentiment_label, "gray"))
    else:
        text, location = input_data
        processed_data = stemming(text)
        input_tfidf = vector.transform([processed_data])
        prediction = model.predict(input_tfidf)
        print(f"Prediction for {text}: {prediction}")

        if prediction == 1:
            result = sentiment_model(text)
            print(result)

            sentiment_label = result[0]['label']
            dictionary = {"LABEL_0": "red", "LABEL_1": "blue", "LABEL_2": "green"}
            locations.append(location)
            colors.append(dictionary.get(sentiment_label, "gray"))

    generate_map(locations, colors)

    return my_map

In [66]:
input_data_single = ("earthquake in mumbai", "mumbai")
handle_input_prediction([input_data_single])

input_data_multiple = [
    ("earthquake in mumbai", "mumbai"),
    ("storm in delhi", "delhi"),
    ("flood in chennai", "chennai")
]
handle_input_prediction(input_data_multiple)



Device set to use cpu


Prediction for earthquake in mumbai: [1]
[{'label': 'LABEL_1', 'score': 0.7991005182266235}]


Device set to use cpu


Prediction for earthquake in mumbai: [1]
[{'label': 'LABEL_1', 'score': 0.7991005182266235}]
Prediction for storm in delhi: [1]
[{'label': 'LABEL_1', 'score': 0.8904128670692444}]
Prediction for flood in chennai: [1]
[{'label': 'LABEL_1', 'score': 0.7894309163093567}]


In [67]:
input_data_2 = [("there was a flood", "vellore"), ("i enjoyed an earthquake", "paris"), ("i was sad during a typhoon", "dubai")]
handle_input_prediction(input_data_2)

Device set to use cpu


Prediction for there was a flood: [1]
[{'label': 'LABEL_1', 'score': 0.6258576512336731}]
Prediction for i enjoyed an earthquake: [1]
[{'label': 'LABEL_2', 'score': 0.8090850114822388}]
Prediction for i was sad during a typhoon: [1]
[{'label': 'LABEL_0', 'score': 0.8252146244049072}]


In [69]:
#saving the model
import pickle
filename = 'tweet_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [70]:
loaded_model = pickle.load(open(filename, 'rb'))

In [None]:
from google.colab import drive
drive.mount('/content/drive')