In [10]:
from keras.models import load_model
import numpy as np 
import pandas as pd

from keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences

### Predicting sentiment of release of first month of chatGPT

In [11]:
df = pd.read_csv('datasets/preprocessed_chatgptfirst.csv')

In [None]:
df = df['tweet']

In [19]:
# check if there is any NaN value

df.isnull().values.any()
df.isnull().sum()

14

In [21]:
# drop the NaN values if any

df = df.dropna()

In [22]:
df.isnull().values.any()
df.isnull().sum()

0

In [12]:
# use the pretrained model 
model = load_model('model/cnnkeras.h5', compile=False)
model.compile(optimizer="adam",
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# Print the summary
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 100)          11683700  
                                                                 
 conv1d (Conv1D)             (None, 99, 32)            6432      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 49, 32)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 48, 32)            2080      
                                                                 
 dropout (Dropout)           (None, 48, 32)            0         
                                                                 
 conv1d_2 (Conv1D)           (None, 47, 16)            1040      
                                                        

In [18]:
df.values

array(['chatgpt optimizing language models dialogue openai',
       'try talking chatgpt new optimized dialogue feedback help improve',
       'chatgpt optimizing language models dialogue machinelearning datascience artificialintelligencenntrending aiml article identified amp digested granola machinedriven rss bot ramsey elbasheer',
       ..., 'chatgpt disassembled dissemble',
       'predictions chatgpt specific trends past years nshould topic days',
       'chatgpt neat stuff'], dtype=object)

In [23]:
tokenizer = Tokenizer(num_words=50000)
tokenizer.fit_on_texts(df.values)

tweets = tokenizer.texts_to_sequences(df)

vocab_size = len(tokenizer.word_index) + 1
maxlen = 100

tweets = pad_sequences(tweets, padding='post', maxlen=maxlen)

word_index = tokenizer.word_index

In [24]:
# we have that :
# 2    positiv
# 1    neutral
# 0    negativ
pred = np.argmax(model.predict(tweets), axis=-1)



In [25]:
pred

array([0, 2, 2, ..., 0, 1, 1], dtype=int64)

In [26]:
len(pred)

219280

In [27]:
len(tweets)

219280

In [28]:
tweets

array([[    1,  1211,    31, ...,     0,     0,     0],
       [   72,   114,     1, ...,     0,     0,     0],
       [    1,  1211,    31, ...,     0,     0,     0],
       ...,
       [    1, 33731, 35798, ...,     0,     0,     0],
       [  944,     1,   376, ...,     0,     0,     0],
       [    1,  2018,   164, ...,     0,     0,     0]])

In [29]:
df

0         chatgpt optimizing language models dialogue op...
1         try talking chatgpt new optimized dialogue fee...
2         chatgpt optimizing language models dialogue ma...
3         thrilled share chatgpt new model optimized dia...
4         minutes ago openai released new chatgpt nnand ...
                                ...                        
219289           software projects trying replicate chatgpt
219290    asked chatgpt write nye joke seos delivered nn...
219291                       chatgpt disassembled dissemble
219292    predictions chatgpt specific trends past years...
219293                                   chatgpt neat stuff
Name: tweet, Length: 219280, dtype: object

In [30]:
len(df)

219280

In [41]:
data = df.values

In [42]:
dataframe = {'tweets': data, 'label': pred}

In [55]:
result = pd.DataFrame(dataframe)

In [56]:
result

Unnamed: 0,tweets,label
0,chatgpt optimizing language models dialogue op...,0
1,try talking chatgpt new optimized dialogue fee...,2
2,chatgpt optimizing language models dialogue ma...,2
3,thrilled share chatgpt new model optimized dia...,1
4,minutes ago openai released new chatgpt nnand ...,1
...,...,...
219275,software projects trying replicate chatgpt,0
219276,asked chatgpt write nye joke seos delivered nn...,1
219277,chatgpt disassembled dissemble,0
219278,predictions chatgpt specific trends past years...,1


In [57]:
result.head

<bound method NDFrame.head of                                                    tweets  label
0       chatgpt optimizing language models dialogue op...      0
1       try talking chatgpt new optimized dialogue fee...      2
2       chatgpt optimizing language models dialogue ma...      2
3       thrilled share chatgpt new model optimized dia...      1
4       minutes ago openai released new chatgpt nnand ...      1
...                                                   ...    ...
219275         software projects trying replicate chatgpt      0
219276  asked chatgpt write nye joke seos delivered nn...      1
219277                     chatgpt disassembled dissemble      0
219278  predictions chatgpt specific trends past years...      1
219279                                 chatgpt neat stuff      1

[219280 rows x 2 columns]>

In [60]:
result['label'] = result['label'].replace({0: 'negativ', 1: 'neutral', 2: 'positiv'})

In [61]:
result

Unnamed: 0,tweets,label
0,chatgpt optimizing language models dialogue op...,negativ
1,try talking chatgpt new optimized dialogue fee...,positiv
2,chatgpt optimizing language models dialogue ma...,positiv
3,thrilled share chatgpt new model optimized dia...,neutral
4,minutes ago openai released new chatgpt nnand ...,neutral
...,...,...
219275,software projects trying replicate chatgpt,negativ
219276,asked chatgpt write nye joke seos delivered nn...,neutral
219277,chatgpt disassembled dissemble,negativ
219278,predictions chatgpt specific trends past years...,neutral


In [62]:
result.to_csv('datasets/sentiment_chatgptfirst.csv')

### Predicting sentiment of last two months of release of chatGPT

In [63]:
df = pd.read_csv('datasets/preprocessed_chatgpt2last.csv')

In [64]:
df.head

<bound method NDFrame.head of         Unnamed: 0                                               text
0              153  fake chatgpts flood app store chatgpt appstore...
1              154  republicans release aigenerated attack preside...
2              155  sell rsi bybit recommendation short ticker zrx...
3              156  interesting chatgpt kept failing test promptge...
4              157  favourite chatgpt prompt day day act end devel...
...            ...                                                ...
188573      190488  good bad ugly typical chatgpt video chatgpt mi...
188574      190489  check new free tradingstrategy chatgpt alligat...
188575      190490                             new enemy todaychatgpt
188576      190491  cutie girl degenai altcoins altcoin degenai ge...
188577      190492  degenai amazing instant chat bot fast generate...

[188578 rows x 2 columns]>

In [65]:
# check if there is any NaN value

df.isnull().values.any()
df.isnull().sum()

Unnamed: 0    0
text          3
dtype: int64

In [66]:
# drop the NaN values if any

df = df.dropna()

In [67]:
df.isnull().values.any()
df.isnull().sum()

Unnamed: 0    0
text          0
dtype: int64

In [68]:
df.columns

Index(['Unnamed: 0', 'text'], dtype='object')

In [69]:
df = df.drop('Unnamed: 0', axis=1)

In [70]:
df.columns

Index(['text'], dtype='object')

In [71]:
df.head

<bound method NDFrame.head of                                                      text
0       fake chatgpts flood app store chatgpt appstore...
1       republicans release aigenerated attack preside...
2       sell rsi bybit recommendation short ticker zrx...
3       interesting chatgpt kept failing test promptge...
4       favourite chatgpt prompt day day act end devel...
...                                                   ...
188573  good bad ugly typical chatgpt video chatgpt mi...
188574  check new free tradingstrategy chatgpt alligat...
188575                             new enemy todaychatgpt
188576  cutie girl degenai altcoins altcoin degenai ge...
188577  degenai amazing instant chat bot fast generate...

[188575 rows x 1 columns]>

In [74]:
df.isnull().values.any()
df.isnull().sum()

text    0
dtype: int64

In [91]:
# use the pretrained model 
model = load_model('model/cnnkeras.h5', compile=False)
model.compile(optimizer="adam",
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# Print the summary
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 100)          11683700  
                                                                 
 conv1d (Conv1D)             (None, 99, 32)            6432      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 49, 32)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 48, 32)            2080      
                                                                 
 dropout (Dropout)           (None, 48, 32)            0         
                                                                 
 conv1d_2 (Conv1D)           (None, 47, 16)            1040      
                                                        

In [81]:
df.values

array([['fake chatgpts flood app store chatgpt appstore apple chatgptnews gpt artificialintelligence ainews aiupdate appstorescam applescam dailynews'],
       ['republicans release aigenerated attack president biden twiceweekly news guide tracks largelanguagemodels useful tools chatgpt bard bing update sunday'],
       ['sell rsi bybit recommendation short ticker zrxusdt minute candle price rsi powered chatgpt zrx zrx'],
       ...,
       ['new enemy todaychatgpt'],
       ['cutie girl degenai altcoins altcoin degenai gems imgnai crypto cai cryptoai chatgpt cryptocurrency'],
       ['degenai amazing instant chat bot fast generated words degenai altcoins altcoin degenai gems imgnai crypto cai cryptoai chatgpt cryptocurrency']],
      dtype=object)

In [101]:
len(df)

188575

In [89]:
len(df.values)

188575

In [87]:
temp = df.values.ravel()

In [88]:
len(temp)

188575

In [90]:
type(temp)

numpy.ndarray

In [97]:
tokenizer = Tokenizer(num_words=50000)
tokenizer.fit_on_texts(temp)

tweets = tokenizer.texts_to_sequences(df.values.ravel())

vocab_size = len(tokenizer.word_index) + 1
maxlen = 100

tweets = pad_sequences(tweets, padding='post', maxlen=maxlen)

word_index = tokenizer.word_index

In [98]:
tweets

array([[  362,   286,  6541, ...,     0,     0,     0],
       [ 8370,   514,   583, ...,     0,     0,     0],
       [  161,    19,   137, ...,     0,     0,     0],
       ...,
       [    6,  6688, 38362, ...,     0,     0,     0],
       [34872,  2281,  2122, ...,     0,     0,     0],
       [ 2122,   110,  2048, ...,     0,     0,     0]])

In [99]:
# we have that :
# 2    positiv
# 1    neutral
# 0    negativ
pred = np.argmax(model.predict(tweets), axis=-1)



In [100]:
pred

array([0, 2, 2, ..., 0, 1, 2], dtype=int64)

In [102]:
len(pred)

188575

In [105]:
data = df.values.ravel()

In [106]:
dataframe = {'tweets': data, 'label': pred}

In [107]:
result = pd.DataFrame(dataframe)

In [108]:
result

Unnamed: 0,tweets,label
0,fake chatgpts flood app store chatgpt appstore...,0
1,republicans release aigenerated attack preside...,2
2,sell rsi bybit recommendation short ticker zrx...,2
3,interesting chatgpt kept failing test promptge...,2
4,favourite chatgpt prompt day day act end devel...,1
...,...,...
188570,good bad ugly typical chatgpt video chatgpt mi...,1
188571,check new free tradingstrategy chatgpt alligat...,2
188572,new enemy todaychatgpt,0
188573,cutie girl degenai altcoins altcoin degenai ge...,1


In [109]:
result['label'] = result['label'].replace({0: 'negativ', 1: 'neutral', 2: 'positiv'})

In [110]:
result

Unnamed: 0,tweets,label
0,fake chatgpts flood app store chatgpt appstore...,negativ
1,republicans release aigenerated attack preside...,positiv
2,sell rsi bybit recommendation short ticker zrx...,positiv
3,interesting chatgpt kept failing test promptge...,positiv
4,favourite chatgpt prompt day day act end devel...,neutral
...,...,...
188570,good bad ugly typical chatgpt video chatgpt mi...,neutral
188571,check new free tradingstrategy chatgpt alligat...,positiv
188572,new enemy todaychatgpt,negativ
188573,cutie girl degenai altcoins altcoin degenai ge...,neutral


In [111]:
result.to_csv('datasets/sentiment_chatgpt2last.csv')