# Import Data

In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
# Load the CSV file into a DataFrame with the correct encoding
df = pd.read_csv('/content/tweets_cryptopunk_2017_2022.csv', encoding='ISO-8859-1')

# Assume df is the DataFrame you want to remove unnamed columns from
df = df.loc[:, ~df.columns.str.startswith('Unnamed:')]


Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.



In [None]:
df.head()

Unnamed: 0,Date,Text
0,2017/6/23 12:16,Got me an @dennisrodman #cryptopunk might be w...
1,2017/6/24 0:03,@dennisrodman #cryptopunk number 7339 is up fo...
2,2017/6/24 21:44,Just sold my first #CryptoPunk - all appears t...
3,2017/6/24 22:53,Good job once again. #cryptocurrency #cryptopu...
4,2017/6/24 23:58,I liked a @YouTube video https://t.co/8EE4BReY...


# Text Processing

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import re

# Download stopwords and stemmer
nltk.download('stopwords')
nltk.download('punkt')

# Define the preprocessing function
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    # Check if text is NaN
    if pd.isna(text):
        return ""
    
    # Convert to lowercase
    text = text.lower()

    # Remove URLs
    text = re.sub(r'http\S+', '', text)

    # Remove non-alphabetic characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Tokenize the text
    words = word_tokenize(text)

    # Remove stop words and punctuation
    words = [word for word in words if word not in stop_words and word.isalpha()]

    # Join the remaining words into a string
    text = ' '.join(words)

    return text

# Apply the preprocessing function to the 'Text' column
df['Preprocessed Text'] = df['Text'].apply(preprocess_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
df.tail()

Unnamed: 0,Date,Text,Preprocessed Text
122369,2022/10/31 22:27,~â¢@FamousFoxFed is launching support on apto...,famousfoxfed launching support aptos foxy tool...
122370,2022/10/31 22:38,Was going to get a hoodie punk. Which one? Wel...,going get hoodie punk one well going og crypto...
122371,2022/10/31 23:02,@Arthemort @SothebysInst Using AI for my Crypt...,arthemort sothebysinst using ai cryptopunk
122372,2022/10/31 23:39,@johnkarp cryptopunk. i could've gotten one ea...,johnkarp cryptopunk couldve gotten one easily ...
122373,2022/10/31 23:45,On Saturday weâre going to release our first...,saturday going release first collection pieces...


In [None]:
# Save the cleaned DataFrame to a new CSV file
df.to_csv('cleaned_tweets_cryptopunk_2022.csv', index=False)

# Sentiment Analysis (VADER)

In [None]:
! pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
df.dropna(subset=['Date'], inplace=True)

In [None]:
df.tail()

Unnamed: 0,Date,Text,Preprocessed Text
122369,2022/10/31 22:27,~â¢@FamousFoxFed is launching support on apto...,famousfoxfed launching support aptos foxy tool...
122370,2022/10/31 22:38,Was going to get a hoodie punk. Which one? Wel...,going get hoodie punk one well going og crypto...
122371,2022/10/31 23:02,@Arthemort @SothebysInst Using AI for my Crypt...,arthemort sothebysinst using ai cryptopunk
122372,2022/10/31 23:39,@johnkarp cryptopunk. i could've gotten one ea...,johnkarp cryptopunk couldve gotten one easily ...
122373,2022/10/31 23:45,On Saturday weâre going to release our first...,saturday going release first collection pieces...


In [None]:
DF = df
DF.head()

Unnamed: 0,Date,Text,Preprocessed Text
0,2017/6/23 12:16,Got me an @dennisrodman #cryptopunk might be w...,got dennisrodman cryptopunk might willing swap...
1,2017/6/24 0:03,@dennisrodman #cryptopunk number 7339 is up fo...,dennisrodman cryptopunk number sale eth best o...
2,2017/6/24 21:44,Just sold my first #CryptoPunk - all appears t...,sold first cryptopunk appears working larvalab...
3,2017/6/24 22:53,Good job once again. #cryptocurrency #cryptopu...,good job cryptocurrency cryptopunk ether ethereum
4,2017/6/24 23:58,I liked a @YouTube video https://t.co/8EE4BReY...,liked youtube video iota drops buy opportunity...


In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Convert the 'Date' column to a datetime object and set it as the index
DF['Date'] = pd.to_datetime(DF['Date'], errors='coerce')
DF.set_index('Date', inplace=True)

# Define a function to compute the sentiment score for each day
def daily_sentiment_score(text):
    sentiment = analyzer.polarity_scores(text)
    return sentiment['compound']

# Compute the daily sentiment score using resample and apply
df_daily = DF['Preprocessed Text'].resample('D').apply(lambda x: x.apply(daily_sentiment_score).mean())


In [None]:
df_daily

Date
2017-06-23    0.202300
2017-06-24    0.447925
2017-06-25    0.000000
2017-06-26         NaN
2017-06-27         NaN
                ...   
2022-10-27    0.100737
2022-10-28    0.124016
2022-10-29    0.104087
2022-10-30    0.120621
2022-10-31    0.141306
Name: Preprocessed Text, Length: 1957, dtype: float64

In [None]:
df_daily_sentiment = df_daily.to_frame(name='Sentiment Score').reset_index()

In [None]:
df_daily_sentiment.columns

Index(['Date', 'Sentiment Score'], dtype='object')

In [None]:
df_daily_sentiment.head()

Unnamed: 0,Date,Sentiment Score
0,2017-06-23,0.2023
1,2017-06-24,0.447925
2,2017-06-25,0.0
3,2017-06-26,
4,2017-06-27,


In [None]:
df_daily_sentiment['Sentiment Score'].fillna(0.000000, inplace=True)

In [None]:
df_daily_sentiment

Unnamed: 0,Date,Sentiment Score
0,2017-06-23,0.202300
1,2017-06-24,0.447925
2,2017-06-25,0.000000
3,2017-06-26,0.000000
4,2017-06-27,0.000000
...,...,...
1952,2022-10-27,0.100737
1953,2022-10-28,0.124016
1954,2022-10-29,0.104087
1955,2022-10-30,0.120621


In [None]:
df_daily_sentiment.to_csv('sentiment_cryptopunk_2022.csv', index=False)

In [None]:
import plotly.graph_objs as go

# Create a line graph using Plotly
fig = go.Figure()

# Add a line trace to the figure
fig.add_trace(go.Scatter(x=df_daily_sentiment['Date'], y=df_daily_sentiment['Sentiment Score'], mode='lines'))

# Customize the layout of the figure
fig.update_layout(title='CryptoPunks Daily Sentiment Score', xaxis_title='Date', yaxis_title='Sentiment Score')

# Display the figure
fig.show()


In [None]:
positive_count = len(df_daily_sentiment[df_daily_sentiment['Sentiment Score'] > 0])
negative_count = len(df_daily_sentiment[df_daily_sentiment['Sentiment Score'] < 0])


In [None]:
import plotly.express as px

# Calculate positive and negative sentiment counts
positive_count = (df_daily_sentiment['Sentiment Score'] > 0).sum()
negative_count = (df_daily_sentiment['Sentiment Score'] < 0).sum()

# Create a dataframe for histogram
hist_df = pd.DataFrame({
    'Sentiment': ['Positive', 'Negative'],
    'Count': [positive_count, negative_count]
})

# Create a plotly histogram
fig = px.histogram(hist_df, x='Sentiment', y='Count', color='Sentiment', 
                   title='CryptoPunks Sentiment Distribution')

# Set y-axis label
fig.update_layout(yaxis_title='Count (days)')

# Add count number on each bar
fig.update_traces(texttemplate='%{y}', textposition='outside')

fig.show()


# Tweet Volume

In [None]:
# Group the tweets by day and count the number of tweets in each day
daily_tweet_count = DF['Text'].resample('D').count()

# Print the daily tweet count
print(daily_tweet_count)

Date
2017-06-23      1
2017-06-24      4
2017-06-25      1
2017-06-26      0
2017-06-27      0
             ... 
2022-10-27    145
2022-10-28     97
2022-10-29    102
2022-10-30     78
2022-10-31    109
Name: Text, Length: 1957, dtype: int64


In [None]:
daily_tweet_count = daily_tweet_count.to_frame(name='Tweet Volume').reset_index()

In [None]:
daily_tweet_count

Unnamed: 0,Date,Tweet Volume
0,2017-06-23,1
1,2017-06-24,4
2,2017-06-25,1
3,2017-06-26,0
4,2017-06-27,0
...,...,...
1952,2022-10-27,145
1953,2022-10-28,97
1954,2022-10-29,102
1955,2022-10-30,78


In [None]:
daily_tweet_count.to_csv('tweet_volume_cryptopunk_2022.csv', index=False)

In [None]:
import plotly.graph_objs as go

# Create a line graph using Plotly
fig = go.Figure()

# Add a line trace to the figure
fig.add_trace(go.Scatter(x=daily_tweet_count['Date'], y=daily_tweet_count['Tweet Volume'], mode='lines'))

# Customize the layout of the figure
fig.update_layout(title='CryptoPunks Daily Tweet Volume', xaxis_title='Date', yaxis_title='Tweet Volume')

# Display the figure
fig.show()
