In [17]:
import pandas as pd
import psycopg2
from datetime import datetime

# Read CSV file into a DataFrame
df = pd.read_csv('Corona_NLP_test.csv')

# Convert TweetAt column to datetime format
df['TweetAt'] = pd.to_datetime(df['TweetAt'], format='%d-%m-%Y')

# Drop rows with any empty or null values
df = df.dropna()

# Connect to PostgreSQL
connection = psycopg2.connect(
    host="localhost",
    database="Corona_NLP_db",
    user="postgres",
    password="3234"
)

# Create a cursor object
cursor = connection.cursor()

# Create table
create_table_query = '''
CREATE TABLE IF NOT EXISTS Corona_NLP_table (
    UserName VARCHAR(255),
    ScreenName VARCHAR(255),
    Location VARCHAR(255),
    TweetAt DATE,
    OriginalTweet TEXT,
    Sentiment VARCHAR(50)
);
'''
cursor.execute(create_table_query)

# Convert DataFrame to list of tuples
data = [tuple(row) for row in df.values]

# Insert data into PostgreSQL
insert_query = '''
INSERT INTO Corona_NLP_table (UserName, ScreenName, Location, TweetAt, OriginalTweet, Sentiment)
VALUES (%s, %s, %s, %s, %s, %s);
'''
cursor.executemany(insert_query, data)

# Commit changes
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()




# import psycopg2

# # Connect to PostgreSQL
# connection = psycopg2.connect(
#     host="localhost",
#     database="postgres",  # Connect to the default 'postgres' database
#     user="postgres",
#     password="3234"
# )

# # Create a cursor object
# cursor = connection.cursor()

# # SQL command to drop the database
# sql_command = "DROP DATABASE IF EXISTS Corona_NLP_db;"

# # Execute the SQL command
# cursor.execute(sql_command)

# # Commit the changes
# connection.commit()

# # Close cursor and connection
# cursor.close()
# connection.close()

# print("Database 'Corona_NLP_db' has been removed.")


In [8]:
import pandas as pd
import plotly.express as px

# Load the data
data = pd.read_csv('Corona_NLP_test.csv')

# Analysis 1: Sentiment Distribution
sentiment_counts = data['Sentiment'].value_counts().reset_index()
sentiment_counts.columns = ['Sentiment', 'Count']

fig1 = px.bar(sentiment_counts, x='Sentiment', y='Count', labels={'x': 'Sentiment', 'y': 'Count'})
fig1.update_layout(title='Sentiment Distribution')
fig1.show()

# Analysis 2: Location Analysis
location_counts = data['Location'].value_counts().reset_index().head(10)
location_counts.columns = ['Location', 'Count']

fig2 = px.bar(location_counts, x='Location', y='Count', labels={'x': 'Location', 'y': 'Count'})
fig2.update_layout(title='Top 10 Locations')
fig2.show()

# Analysis 3: Tweet Volume Over Time
data['TweetAt'] = pd.to_datetime(data['TweetAt'], format='%d-%m-%Y')
tweet_volume = data.groupby('TweetAt').size().reset_index(name='Count')

fig3 = px.line(tweet_volume, x='TweetAt', y='Count', labels={'x': 'Date', 'y': 'Tweet Volume'})
fig3.update_layout(title='Tweet Volume Over Time')
fig3.show()

# Analysis 4: Word Cloud of Original Tweets
from wordcloud import WordCloud

text = ' '.join(data['OriginalTweet'].dropna().tolist())
wordcloud = WordCloud(width=800, height=400).generate(text)

fig4 = px.imshow(wordcloud)
fig4.update_layout(title='Word Cloud of Original Tweets')
fig4.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

