### Import the Required Libraries 

In [1]:
import pandas as pd 
import numpy as np 
from rake_nltk import Rake 
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import RegexpTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity, linear_kernel
import gensim

### Read in the csvs

In [2]:
df = pd.read_csv('3.final_data/mega_data.csv')
fire_df = pd.read_csv('3.final_data/fire_comb.csv')
blizz_df = pd.read_csv('3.final_data/blizzard_comb.csv')
flood_df = pd.read_csv('3.final_data/flood_comb.csv')
hurr_df = pd.read_csv('3.final_data/hurricane_comb.csv')
earth_df = pd.read_csv('3.final_data/earthquake_comb.csv')
torn_df = pd.read_csv('3.final_data/tornado_comb.csv')

df.head(2)

Unnamed: 0,author,content,description,photo_url,pub_date,source,title,url,combined_text,tokens,lems,stems,keywords
0,Lisa Rowan,"Its hurricane season, and weve got a weirdo st...","It’s hurricane season, and we’ve got a weirdo ...",https://i.kinja-img.com/gawker-media/image/upl...,2019-07-10,,Never Try To Drive Through a Flood,https://lifehacker.com/never-try-to-drive-thro...,"Its hurricane season, and weve got a weirdo st...","['Its', 'hurricane', 'season', 'and', 'weve', ...",Its hurricane season and weve got a weirdo sto...,it hurrican season and weve got a weirdo storm...,"['read', 'ever', 'following', 'demand', '…', '..."
1,"Yessenia Funes on Earther, shared by Virginia ...",Tropical Storm Barry still doesnt formally exi...,Tropical Storm Barry still doesn’t formally ex...,https://i.kinja-img.com/gawker-media/image/upl...,2019-07-11,,New Orleans Faces a Major Flood Threat [Updating],https://earther.gizmodo.com/new-orleans-faces-...,Tropical Storm Barry still doesnt formally exi...,"['Tropical', 'Storm', 'Barry', 'still', 'doesn...",Tropical Storm Barry still doesnt formally exi...,tropic storm barri still doesnt formal exist b...,"['tropical', 'storm', 'barry', 'still', 'doesn..."


### Recomender

In [3]:
def recomender(df, min_sim_score, title):
    indices_title = pd.Series(df.index, index=df["title"])
    idx = indices_title[title]
    
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df["combined_text"])
    cosine_sim_matrix = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    sim_scores = list(enumerate(cosine_sim_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    filtered_sim_scores = []
    for sim_score in sim_scores:
        if sim_score[1] >= min_sim_score:
            filtered_sim_scores.append(sim_score)
    
    print(f'Number of found articles for {title}: {len(filtered_sim_scores)}')
    
    article_indices = [i[0] for i in filtered_sim_scores]
    return df['title'].iloc[article_indices]

### Wildfire

In [4]:
title = fire_df.iloc[0]["title"]
recom_list = list(recomender(df, 0.09, title))

Number of found articles for California Fires Live Updates: Strong Winds May Expand Blazes: 96


In [5]:
recom_list = recom_list[:3]
temp_df = df
temp_df["final_list"] = temp_df["title"].map(lambda x: 1 if x in recom_list else 0)
temp_df = temp_df.loc[temp_df["final_list"] == 1]
temp_df.shape

(3, 14)

In [6]:
temp_df.to_csv("4.search_home/fire_home.csv")

### Tornado

In [7]:
title = torn_df.iloc[5]["title"]
recom_list = list(recomender(df, 0.09, title))

Number of found articles for Second tornado confirmed from weekend Saskatchewan storms - The Weather Network: 197


In [8]:
recom_list = recom_list[:3]
temp_df = df
temp_df["final_list"] = temp_df["title"].map(lambda x: 1 if x in recom_list else 0)
temp_df = temp_df.loc[temp_df["final_list"] == 1]
temp_df.shape

(3, 14)

In [9]:
temp_df.to_csv("4.search_home/torn_home.csv")

### Flood

In [10]:
title = flood_df.iloc[0]["title"]
recom_list = list(recomender(df, 0.09, title))

Number of found articles for Woman buys out shoe store to help flood victims: 41


In [11]:
recom_list = recom_list[:3]
temp_df = df
temp_df["final_list"] = temp_df["title"].map(lambda x: 1 if x in recom_list else 0)
temp_df = temp_df.loc[temp_df["final_list"] == 1]
temp_df.shape

(3, 14)

In [12]:
temp_df.to_csv("4.search_home/flood_home.csv")

### Earthquake

In [13]:
title = earth_df.iloc[1]["title"]
recom_list = list(recomender(df, 0.09, title))

Number of found articles for Powerful 7.5-magnitude earthquake hits Ecuador: 491


In [14]:
recom_list = recom_list[:3]
temp_df = df
temp_df["final_list"] = temp_df["title"].map(lambda x: 1 if x in recom_list else 0)
temp_df = temp_df.loc[temp_df["final_list"] == 1]
temp_df.shape

(4, 14)

In [15]:
temp_df.to_csv("4.search_home/earth_home.csv")

### Blizzard

There are no recent news for blizzard

### Hurricane

In [16]:
title = hurr_df.iloc[0]["title"]
recom_list = list(recomender(df, 0.09, title))

Number of found articles for Typhoon, Cyclone or Hurricane? Different Names for the Same Storms: 60


In [17]:
recom_list = recom_list[:3]
temp_df = df
temp_df["final_list"] = temp_df["title"].map(lambda x: 1 if x in recom_list else 0)
temp_df = temp_df.loc[temp_df["final_list"] == 1]
temp_df.shape

(3, 14)

In [18]:
temp_df.to_csv("4.search_home/hurr_home.csv")