In [1]:
import numpy as np
import pandas as pd

from numba import njit, prange, double

In [2]:
# set random state
np.random.seed(123)

# since numba utilize its own random state, which can be made unsafe
# with parallelism, lets avoid randomness inside numba entirely

# Explore database

It contains the following 6 fields:

**target**: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive). No neutral entry is present.

**ids**: The id of the tweet ( 2087)

**date**: the date of the tweet (Sat May 16 23:58:44 UTC 2009)

**flag**: The query (lyx). If there is no query, then this value is NO_QUERY.

**user**: the user that tweeted (robotickilldozr)

**text**: the text of the tweet (Lyx is cool)

In [3]:
headers= [ 'target', 'ids', 'date', 'flag', 'user', 'text']
df = pd.read_csv("Sentiment140.csv", names=headers, encoding = "latin", )
df.head()

Unnamed: 0,target,ids,date,flag,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."


In [16]:
# we wish to predict sentiment based only on the text
df = df[['target', 'text']]
df

Unnamed: 0,target,text
0,0,@ awww thats a bummer you shoulda got david ca...
1,0,is upset that he cant update his facebook by t...
2,0,@ i dived many times for the ball managed to s...
3,0,my whole body feels itchy and like its on fire
4,0,@ no its not behaving at all im mad why am i h...
...,...,...
1599995,1,just woke up having no school is the best feel...
1599996,1,thewdbcom very cool to hear old walt interview...
1599997,1,are you ready for your mojo makeover ask me fo...
1599998,1,happy 38th birthday to my boo of alll time tup...


Now that we narrow down the useful columns, lets normalize the data.

In [5]:
df['target'].unique()

array([0, 4])

In [6]:
# normilize target input
df.loc[df['target'] == 0, 'target'] = 0
df.loc[df['target'] == 4, 'target'] = 1
df['target'].unique()

array([0, 1])

Normalizing the messages will be trickier. Lets start by removing links, punctuation, and condensing all mentions as a simple @.

In [7]:
import re

def normalize_text(text):
    # condense metions
    text = re.sub("@\w+", "@", text)
    # remove links
    text = re.sub("http\S+|www\.\S+", "", text)
    # remove punctuation
    text = text.translate(str.maketrans('', '', '!"#$%&\'()*+,-./:;<=>?[\\]^_`{|}~'))
    # Clean up extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text.lower()
    
df['text'] = df['text'].map(normalize_text)
df.head()

Unnamed: 0,target,text
0,0,@ awww thats a bummer you shoulda got david ca...
1,0,is upset that he cant update his facebook by t...
2,0,@ i dived many times for the ball managed to s...
3,0,my whole body feels itchy and like its on fire
4,0,@ no its not behaving at all im mad why am i h...


In [10]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenization and vectorization
tokenizer = Tokenizer()  # Default settings: will tokenize based on words
tokenizer.fit_on_texts(df['text'])  # Fit tokenizer on the cleaned text
sequences = tokenizer.texts_to_sequences(df['text'])  # Convert text to sequences

# Padding sequences
max_length = max(len(seq) for seq in sequences)  # Find the longest sequence
padded = pad_sequences(sequences, maxlen=max_length, padding='post')  # Pad all to the same length

In [14]:
print(f"""
vocabury size = {len(np.unique(padded))}
text max size = {padded.shape[1]}
""")


vocabury size = 450311
text max size = 40



# SCNN

In [None]:
# activation function
@njit
def activation(x):
    return return 1 / (1 + np.exp(-x))

@njit
def derivated(x):
    return x * (1 - x)

In [None]:
# propagation
@njit
def foward():
    return

@njit 
def backward():
    return

In [None]:
@njit(double[:](double[:], double[:]), parallel = True)
def extract_swarm_features(input_layer, swarm_filter):
    features = np.empty(swarm_filter.shape)
    for i in prange(features.shape[0]):
        features[i] = (input_layer * swarm_filter[i]).mean()
    return features



In [None]:
class NeuralNetwork:
    def __init__(
        self,
        layer_sizes,
        layer_activation,
        weights,
        biases,
        learning_rate
    ):
        self.layer_sizes = layer_sizes
        self.layer_activation = layer_activation
        self.weights = weights
        self.biases = biases
        self.learning_rate = learning_rate

    def add_layer(layer_type, size, activation="sigmoid"):
        ###

    def train(self, X, y, epochs):

        for e in range(epochs):
            
        ###

    def predict(self, X):
        y = None
        return y