In [2]:
import pandas as pd
import folium
from folium.plugins import HeatMap
import plotly.express as px



In [3]:
file_path = [f"C:/Users/admin/Desktop/SMA-GIthub -Assignment/Dataset/dutch_tweets_chunk{i}.json" for i in range(10)]

In [5]:
df_read = [pd.read_json(file) for file in file_path]
df = pd.concat(df_read, ignore_index=True)

print(df.shape)
print(df.head(10))

(271342, 23)
                                           full_text  \
0  @pflegearzt @Friedelkorn @LAguja44 Pardon, wol...   
1  RT @grantshapps: Aviation demand is reduced du...   
2  RT @DDStandaard: De droom van D66 wordt werkel...   
3  RT @DDStandaard: De droom van D66 wordt werkel...   
4  De droom van D66 wordt werkelijkheid: COVID-19...   
5  De droom van D66 wordt werkelijkheid: COVID-19...   
6  RT @eucopresident: Following consultations I w...   
7  RT @eucopresident: Following consultations I w...   
8      Read absorb and share https://t.co/IHPDY6qS4w   
9  RT @FrankvanWijck: Het aantal deskundigen over...   

                                    text_translation           created_at  \
0  @pflegearzt @Friedelkorn @ LAguja44 Pardon wol...  2020-03-09 12:26:29   
1  RT @grantshapps: Aviation demand is reduced du...  2020-03-09 12:26:34   
2  RT @DDStandaard: The D66 dream come true: COVI...  2020-03-09 12:26:37   
3  RT @DDStandaard: The D66 dream come true: COVI...  2020-03-

In [None]:
required_col = ["latitude","longitude"]
df_cols = df[required_col]
print(df_cols.dtypes)

latitude     float64
longitude    float64
dtype: object


In [7]:
print(max(df_cols['longitude']))
print(min(df_cols['longitude']))
print(max(df_cols['latitude']))
print(min(df_cols['latitude']))

179.0122737
-160.3554851
90.0
-79.4063075


In [8]:
print(len(df_cols))

271342


In [10]:
#Cleaning dataset 
df_cols = df_cols.dropna(subset=['latitude','longitude'])
df_cols = df_cols[(df_cols["latitude"].between(-79.4063075,90.0)) & (df_cols['longitude'].between(-160.3554851,179.0122737))]

print(f"Length of total record: {len(df_cols)}")

Length of total record: 134445


In [11]:
map_center = [df_cols["latitude"].mean(),df_cols["longitude"].mean()]
print(map_center)

[np.float64(49.529843755182085), np.float64(4.194786876439347)]


In [12]:
figure = px.density_map(df_cols,lat="latitude",lon="longitude",center=dict(lat=df_cols["latitude"].mean(),lon=df_cols["longitude"].mean()),radius=6,zoom=8,map_style="open-street-map")
figure.show()

In [25]:
figure.write_html("plotyly_Dutch.html")

In [26]:
heatmap_map = folium.Map(location=map_center, zoom_start=4)
heat_data = df_cols[["latitude","longitude"]].values.tolist()
print("Generating heatmap.... ")
HeatMap(heat_data, radius=10, blur=15, max_zoom=6).add_to(heatmap_map)

Generating heatmap.... 


<folium.plugins.heat_map.HeatMap at 0x1b3f4cb1640>

In [27]:
output_file = "heatmap_Dutch.html"
heatmap_map.save(output_file)
print(f"Heatmap saved as :'{output_file}'")

Heatmap saved as :'heatmap_Dutch.html'


In [17]:
print(df.columns)
print(df['full_text'].head(10))
print(df['text_translation'].head(10))
print(df['sentiment_pattern'].head(10))

Index(['full_text', 'text_translation', 'created_at', 'screen_name',
       'description', 'desc_translation', 'weekofyear', 'weekday', 'day',
       'month', 'year', 'location', 'point_info', 'point', 'latitude',
       'longitude', 'altitude', 'province', 'hisco_standard', 'hisco_code',
       'industry', 'sentiment_pattern', 'subjective_pattern'],
      dtype='object')
0    @pflegearzt @Friedelkorn @LAguja44 Pardon, wol...
1    RT @grantshapps: Aviation demand is reduced du...
2    RT @DDStandaard: De droom van D66 wordt werkel...
3    RT @DDStandaard: De droom van D66 wordt werkel...
4    De droom van D66 wordt werkelijkheid: COVID-19...
5    De droom van D66 wordt werkelijkheid: COVID-19...
6    RT @eucopresident: Following consultations I w...
7    RT @eucopresident: Following consultations I w...
8        Read absorb and share https://t.co/IHPDY6qS4w
9    RT @FrankvanWijck: Het aantal deskundigen over...
Name: full_text, dtype: object
0    @pflegearzt @Friedelkorn @ LAguja44 Par

In [18]:
sen_analysis = df[['text_translation','sentiment_pattern']]
sen_analysis.shape


(271342, 2)

In [19]:
def sentiment_analysis(score):
    if score > 0 :
        return "Positive"
    elif score < 0 :
        return "Negative"
    else:
        return "Neutral"

In [20]:
print(sen_analysis.dtypes)

text_translation      object
sentiment_pattern    float64
dtype: object


In [23]:
sen_analysis = sen_analysis.dropna(subset=['text_translation','sentiment_pattern'])
sen_analysis['sentiment_lables'] = sen_analysis['sentiment_pattern'].apply(sentiment_analysis)
print(sen_analysis.head(10))

                                    text_translation  sentiment_pattern  \
0  @pflegearzt @Friedelkorn @ LAguja44 Pardon wol...                0.0   
1  RT @grantshapps: Aviation demand is reduced du...                0.0   
2  RT @DDStandaard: The D66 dream come true: COVI...                0.0   
3  RT @DDStandaard: The D66 dream come true: COVI...                0.0   
4  The D66 dream becomes reality: COVID-19 super ...                0.0   
5  The D66 dream becomes reality: COVID-19 super ...                0.0   
6  RT @eucopresident: Following consultations I w...                0.0   
7  RT @eucopresident: Following consultations I w...                0.0   
8      Read absorb and share https://t.co/IHPDY6qS4w                0.0   
9  RT @FrankvanWijck: The number of experts on co...                0.4   

  sentiment_lables  
0          Neutral  
1          Neutral  
2          Neutral  
3          Neutral  
4          Neutral  
5          Neutral  
6          Neutral  
7     

In [24]:
sen_analysis.to_csv("Aanalysis_review.csv",index = False)