# Graded assignment 1 - text classification using Genetic Algorithms
## By Abdullah Karagøz

In this assignmetn we'll make a binary text classifier using genetic algorithms. We will classify movie reviews from IMDB as either negative or positive. This task consists of several steps:

1. Preprocessing of the text
2. Genetich Algorithm
3. Validation



## 1. Preprocessing

In [195]:
## Upload the text
import numpy as np
import pandas as pd
import nltk
import string
import re
import math
import random
from scipy import special
nltk.download('wordnet')
nltk.download('punkt')
from nltk.corpus import PlaintextCorpusReader
from nltk import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer 

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\abdka\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\abdka\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [197]:
# File directories
corpus_train_pos_root = 'aclImdb/train/pos/'
corpus_train_neg_root = 'aclImdb/train/neg/'
corpus_test_pos_root = 'aclImdb/test/pos/'
corpus_test_neg_root = 'aclImdb/test/neg/'

# Corpus file objects
files_train_pos = PlaintextCorpusReader(corpus_train_pos_root, '.*')
files_train_neg = PlaintextCorpusReader(corpus_train_neg_root, '.*')
files_test_pos = PlaintextCorpusReader(corpus_test_pos_root, '.*')
files_test_neg = PlaintextCorpusReader(corpus_test_neg_root, '.*')


# Getting review texts, labels and rates all in arrays
reviews_train_pos = [files_train_pos.open(n).read() for n in files_train_pos.fileids()]
rates_train_pos = [int(re.split("_|\.", n)[-2]) for n in files_train_pos.fileids()]
labels_train_pos = [np.array([1, 0])] * len(reviews_train_pos)

reviews_train_neg = [files_train_neg.open(n).read() for n in files_train_neg.fileids()]
rates_train_neg = [int(re.split("_|\.", n)[-2]) for n in files_train_neg.fileids()]
labels_train_neg = [np.array([0, 1])] * len(reviews_train_neg)

reviews_test_pos = [files_test_pos.open(n).read() for n in files_test_pos.fileids()]
rates_test_pos = [int(re.split("_|\.", n)[-2]) for n in files_test_pos.fileids()]
labels_test_pos = [np.array([1, 0])] * len(reviews_test_pos)

reviews_test_neg = [files_test_neg.open(n).read() for n in files_test_neg.fileids()]
rates_test_neg = [int(re.split("_|\.", n)[-2]) for n in files_test_neg.fileids()]
labels_test_neg = [np.array([0, 1])] * len(reviews_test_neg)

In [198]:
# Putting all into two Pandas dataframes - training set and testing set
train_set = pd.DataFrame()
test_set = pd.DataFrame()

train_set['review'] = reviews_train_pos + reviews_train_neg
train_set['rate'] = rates_train_pos + rates_train_neg
train_set['label'] = labels_train_pos + labels_train_neg

test_set['review'] = reviews_test_pos + reviews_test_neg
test_set['rate'] = rates_test_pos + rates_test_neg
test_set['label'] = labels_test_pos + labels_test_neg


In [199]:
# I think to put this into own .PY file and import from there
class text_preprocessor():
    def __init__(self):
        import nltk
        import re
        import string
        
        nltk.download('stopwords')
        nltk.download('wordnet')
        from nltk.corpus import stopwords
        ", ".join(stopwords.words('english'))
        from nltk.stem.wordnet import WordNetLemmatizer 
        
        self.stop_words = set(stopwords.words('english'))
        
        self.punctuation = string.punctuation
        
        self.emoji_pattern = re.compile("["
                                u"\U0001F600-\U0001F64F"  # emoticons
                                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                                u"\U0001F680-\U0001F6FF"  # transport & map symbols
                                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                                u"\U00002702-\U000027B0"
                                u"\U000024C2-\U0001F251"
                                "]+", flags=re.UNICODE)
        
        # src : https://github.com/NeelShah18/emot/blob/master/emot/emo_unicode.py
        self.emoticons = {
            u":‑\)":"Happy face or smiley",
            u":\)":"Happy face or smiley",
            u":-\]":"Happy face or smiley",
            u":\]":"Happy face or smiley",
            u":-3":"Happy face smiley",
            u":3":"Happy face smiley",
            u":->":"Happy face smiley",
            u":>":"Happy face smiley",
            u"8-\)":"Happy face smiley",
            u":o\)":"Happy face smiley",
            u":-\}":"Happy face smiley",
            u":\}":"Happy face smiley",
            u":-\)":"Happy face smiley",
            u":c\)":"Happy face smiley",
            u":\^\)":"Happy face smiley",
            u"=\]":"Happy face smiley",
            u"=\)":"Happy face smiley"
        }

    def lower_case(self, text):
        return str.lower(text)
    
    def remove_punctuation(self, text):
        return text.translate(str.maketrans('', '', self.punctuation))
    
    def remove_stopwords(self, text):
        return " ".join([word for word in str(text).split() if word not in self.stop_words])
    
    def remove_words(self, text, freq_words):
        return " ".join([word for word in str(text).split() if word not in freq_words])
    
    def remove_emoji(self, text):
        # src: https://gist.github.com/slowkow/7a7f61f495e3dbb7e3d767f97bd7304b
        return self.emoji_pattern.sub(r'', text)
    
    
    def remove_emoticons(self, text):
        import re
        # src : https://github.com/NeelShah18/emot/blob/master/emot/emo_unicode.py
        emoticon_pattern = re.compile(u'(' + u'|'.join(k for k in self.emoticons) + u')')
        return emoticon_pattern.sub(r'', text)
    
    def convert_emoticons(self, text):
        # src : https://github.com/NeelShah18/emot/blob/master/emot/emo_unicode.py
        for emot in self.emoticons:
            text = re.sub(u'('+emot+')', "_".join(self.emoticons[emot].replace(",","").split()), text)
        return text
    
    def lemmatization(self, text):
        lemmatizer = WordNetLemmatizer()
        tokens = word_tokenize(text)
        for i in ['v','n','a']:
            tokens = [lemmatizer.lemmatize(word, i) for word in tokens]
        text = " ".join(tokens)
        return text
    
    def expand_contractions(self, text):
        text = re.sub(r"i'm", " i am ", text)
        text = re.sub(r" im ", " i am ", text)
        text = re.sub(r"\: p", "", text)
        text = re.sub(r" ive ", " i have ", text)
        text = re.sub(r" he's ", " he is ", text)
        text = re.sub(r" she's ", " she is ", text)
        text = re.sub(r" that's ", " that is ", text)
        text = re.sub(r" what's ", " what is ", text)
        text = re.sub(r" where's ", " where is ", text)
        text = re.sub(r" haven't ", " have not ", text)
        text = re.sub(r" ur ", " you are ", text)
        text = re.sub(r"\'ll", " will", text)
        text = re.sub(r"\'ve", " have", text)
        text = re.sub(r"\'re", " are", text)
        text = re.sub(r"\'d", " would", text)
        text = re.sub(r" won't ", " will not ", text)
        text = re.sub(r" wouldn't ", " would not ", text)
        text = re.sub(r" can't ", " cannot ", text)
        text = re.sub(r" cannot ", " cannot ", text)
        text = re.sub(r" don't ", " do not ", text)
        text = re.sub(r" didn't ", " did not ", text)
        text = re.sub(r" doesn't ", " does not ", text)
        text = re.sub(r" isn't ", " is not ", text)
        text = re.sub(r" it's ", " it is ", text)
        text = re.sub(r" who's ", " who is ", text)
        text = re.sub(r" there's ", " there is ", text)
        text = re.sub(r" weren't ", " were not ", text)
        text = re.sub(r" okay ", " o", text)
        text = re.sub(r" you're ", " you are ", text)
        text = re.sub(r" c'mon ", " come on ", text)
        text = re.sub(r"in'", "ing", text)
        text = re.sub(r"\'s", " s", text)
        return text
    
    def remove_urls(self, text):
        url_pattern = re.compile(r'https?://\S+|www\.\S+')
        return url_pattern.sub(r'', text)


In [200]:
# preprocessing function
def preprocess_imdb_reviews(preprocessor, df):
    df['review'] = df['review'].apply(lambda text: preprocessor.lower_case(text))
    df['review'] = df['review'].apply(lambda text: preprocessor.expand_contractions(text))
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_urls(text))
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_emoji(text))
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_punctuation(text))
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_stopwords(text))
    df['review'] = df['review'].apply(lambda text: preprocessor.lemmatization(text))


    from collections import Counter
    cnt = Counter()
    cnt2 = Counter()
    for text in df["review"].values:
        # Counting the words
        for word in text.split():
            cnt[word] += 1
        # Counting in how many reviews the word appears
        for word in set(text.split()):
            cnt2[word] += 1

    # Removing most frequent words
    n_freq_words = 0
    freq_words = set([w for (w, wc) in cnt.most_common(n_freq_words)])
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_words(text, freq_words))

    # Removing rarest words
    n_rare_words = 0
    rare_words = set([w for (w, wc) in cnt.most_common()[:-n_rare_words-1:-1]])
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_words(text, rare_words))
    
    
    # Remove words used in >90% and <5% of the reviews
    curb_max_amount = len(df) * 0.90
    curb_min_amount = len(df) * 0.05

    curb_words = set([w for (w, wc) in cnt2.most_common() if wc > curb_max_amount or wc < curb_min_amount])
    df['review'] = df['review'].apply(lambda text: preprocessor.remove_words(text, curb_words))
    
    return df

len(train_set)

25000

In [201]:
#Using the preprocessor
preprocessor = text_preprocessor()
train_set_processed = preprocess_imdb_reviews(preprocessor, train_set)
test_set_processed = preprocess_imdb_reviews(preprocessor, test_set)
train_set_processed

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\abdka\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\abdka\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Unnamed: 0,review,rate,label
0,high comedy run time life year lead believe hi...,9,"[1, 0]"
1,year never help consider human everything go w...,8,"[1, 0]"
2,best ever see love scene second face classic g...,10,"[1, 0]"
3,film sure give view say give view give view ma...,7,"[1, 0]"
4,film much le movie actually plot make movie ac...,8,"[1, 0]"
...,...,...,...
24995,end movie felt felt like watch like see guess ...,4,"[0, 1]"
24996,kind movie watch time true watch make sure bad...,3,"[0, 1]"
24997,saw last night film one see small movie fan so...,3,"[0, 1]"
24998,film turn rather good film release american mo...,1,"[0, 1]"


In [202]:
# FUNCTIONS TO VECTORIZE

# Get the IDF values and VOCAB vector
def get_idf_vocab(df):
    from collections import Counter
    import math
    
    # Prepare the vocab
    vocab = set(" ".join(train_set_processed['review'].values).split())
    vocab = dict.fromkeys(vocab, 0)
    vocab.update((k, i) for i, k in enumerate(vocab))
    
    # Counting how many reviews a word appears in
    cnt = Counter()
    for text in df["review"].values:
        for word in set(text.split()):
            cnt[word] += 1
    # Preparing the IDF vector
    size = len(df)
    idfs = dict()
    for w, c in cnt.items():
        idfs[w] = 0 if c == 0 else math.log(size / c)
    
    return idfs, vocab


# TF-IDF vectorize a single text, returning a np.array
def tf_idf_vectorize(text, idfs, vocab):
    freq_dist = nltk.FreqDist(text.split())
    
    vector = np.zeros(len(vocab))
    
    for w, c in freq_dist.items():
        if w in vocab:
            vector[vocab[w]] = c * idfs[w]
    
    return vector

# Vectorize all in the dataset
def tf_idf_vectorize_all(df, idfs, vocab):
    df['vectors'] = df['review'].apply(lambda text: tf_idf_vectorize(text, idfs, vocab))
    return df
    

# Get the IDF values and VOCAB vector
idfs, vocab = get_idf_vocab(train_set_processed)

# Vectorizing train set and test set
train_set_processed = tf_idf_vectorize_all(train_set_processed, idfs, vocab)
test_set_processed = tf_idf_vectorize_all(test_set_processed, idfs, vocab)
len(vocab)

339

## Classifier
There are a lot of classifiers. I haven't any experience in most of them, just a little familiar with Neural Network. When assigning fitness in GA I have to test-run all the 25k samples. If I have X number of classifiers and I'll run in N number of generations, then the complexity of running the GA will be at least like:
X * N * 25,000. That'll be a big number, and my computer is not that fast.

Taking hardware limitations into account, I wanted to choose a model that's quick to run. NN with multilayers may be too complicated and too slow to run. This make experimenting with different methods and tuning hyper-parameters very difficult.

After taking a quick look at this link:
https://medium.com/text-classification-algorithms/text-classification-algorithms-a-survey-a215b7ab7e2d

I decided a simple SVM  would be best to try using as classifier as it's quick to run.

In [203]:
class Classifier():
    def __init__(self, idfs, vocab):
        self.idfs = idfs
        self.vocab = vocab
    
    def relu(self, x):
        return (x > 0) * x 

    def sigmoid(self, x):
        from scipy import special
        return special.expit(x)
    
    def softmax(self, x):
        import numpy as np
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()

    def cross_entropy(self, p, y):
        import numpy as np
        return -(sum(np.nan_to_num(y*np.log(p))))
    
    def predict_single_row(self, x, y, w_1, w_2):
        #forward pass/prediction
        layer_1 = self.relu(x.dot(w_1))

        layer_out = self.softmax(layer_1.dot(w_2))

        return layer_out
    
    def predict_whole_set(self, df, w_1, w_2):
        df['predictions'] = df.apply(lambda vec: self.predict_single_row(vec.vectors, vec.label, w_1, w_2), axis=1)
        df['CE-loss'] = df.apply(lambda x: self.cross_entropy(x.predictions, x.label), axis=1)
        mean_loss = df['CE-loss'].mean()
        return mean_loss, df
    

        

In [204]:
# Initializing weights
def init_weights():
    np.random.seed(5)
    hidden_nodes = 8
    ws_1 = np.random.rand(len(vocab), hidden_nodes) - 0.5
    ws_2 = np.random.rand(hidden_nodes, 2) - 0.5
    return ws_1, ws_2

ws_1, ws_2 = init_weights()

In [205]:
classifier = Classifier(idfs, vocab)

In [206]:
loss, output_df = classifier.predict_whole_set(test_set_processed, ws_1, ws_2)

In [207]:
print(loss)
output_df

1.7007500825724318


Unnamed: 0,review,rate,label,vectors,predictions,CE-loss
0,go saw movie last night friend see know comedy...,10,"[1, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 2.5764957613619086, ...","[0.9174607928881857, 0.08253920711181426]",0.086145
1,actor turn director follow family u open young...,7,"[1, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.9405757566456582, 0.05942424335434174]",0.061263
2,early movie well fight young go eye american a...,9,"[1, 0]","[2.8383867028302134, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6642002324125545, 0.3357997675874454]",0.409172
3,saw film act good story movie well film could ...,8,"[1, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6296536964942897, 0.37034630350571035]",0.462585
4,take true story u open make film much film als...,8,"[1, 0]","[2.8383867028302134, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.7886726473959893, 0.21132735260401064]",0.237404
...,...,...,...,...,...,...
24995,let kid watch understand show minute br anyone...,1,"[0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.9649670765399989, 0.03503292346000097]",3.351467
24996,pretty much tv show people make reason can not...,1,"[0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.999365090360261, 0.0006349096397389085]",7.362028
24997,try make lot use title bring small surprise ch...,3,"[0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.976...","[0.9954322581746063, 0.004567741825393718]",5.388736
24998,thing film star always watch feature new film ...,3,"[0, 1]","[0.0, 0.0, 2.256847201320192, 0.0, 0.0, 0.0, 0...","[0.6864880335728475, 0.31351196642715246]",1.159918


## Genetic algorithm

In [208]:
class Chromosome:
    # x_pos and y_pos are the features of our chromosome
    def __init__(self, ws_1, ws_2):
        self._fitness = 0
        self._ws_1 = ws_1
        self._ws_2 = ws_2
    
    # Getters and setters
    @property
    def ws_1(self):
        return self._ws_1
    
    @ws_1.setter
    def ws_1(self, value):
        self._ws_1 = value
    
    @property
    def ws_2(self):
        return self._ws_2
    
    @ws_2.setter
    def ws_2(self, value):
        self._ws_2 = value
    
    @property
    def fitness(self):
        return self._fitness
    
    @fitness.setter
    def fitness(self, value):
        self._fitness = value
    
    def assign_fitness(self, classifier, data_set):
        import math
        loss, _ = classifier.predict_whole_set(data_set, self.ws_1, self.ws_2)
        self.fitness = 1/loss #0 if loss <= 0 or loss == float('inf') else -math.log(1 / loss)
    
    # produce a new offspring from 2 parents
    def crossover(self, other):
        r = 0
        
        min_mat_1 = np.minimum(self.ws_1, other.ws_1)
        max_mat_1 = np.maximum(self.ws_1, other.ws_1)
        min_mat_2 = np.minimum(self.ws_2, other.ws_2)
        max_mat_2 = np.maximum(self.ws_2, other.ws_2)
        
        ws_1 = np.random.uniform(min_mat_1-r, max_mat_1+r)
        ws_2 = np.random.uniform(min_mat_2-r, max_mat_2+r) 
        
        offspring = Chromosome(ws_1, ws_2)
        return offspring

    # mutate the individual
    def mutate(self):
        np.random.shuffle(self.ws_1)
        self.ws_1 = self.ws_1 + np.random.uniform(-1, 1, size=self.ws_1.shape)
        np.random.shuffle(self.ws_2)
        self.ws_2 = self.ws_2 + np.random.uniform(-1, 1, size=self.ws_2.shape)
        return
    

    



In [209]:
class GAEngine:
    def __init__(self):
        self.population = []
        self.food = []
        self.generations = 0

    def make_initial_population(self, population_size):       
        for i in range(population_size):
            ws_1, ws_2 = self.init_weights()
            self.population.append(Chromosome(ws_1, ws_2))
    
    def set_generations(self, g):
        self.generations = g
        return
        
    def get_generations(self):
        return self.generations

            
    # Initializing weights
    def init_weights(self):
        hidden_nodes = 8
        ws_1 = np.random.rand(len(vocab), hidden_nodes) - 0.5
        ws_2 = np.random.rand(hidden_nodes, 2) - 0.5
        return ws_1, ws_2
    

    # selection code goes here...
    def do_crossover(self, no_of_offspring):
        import random
        population_size = len(self.population)
        # Here we combine elitism selection with roulette wheel
        # We carry ca 40 % of the most fit over to the next generation.
        # Then we use roulette wheel because we want diversity too.
        # We want diversity because the foods are repositioned to different places.
        
        # Get the top ca 40 % fittest.
        rate_to_keep = 0.1
        keep_nr = int(population_size * rate_to_keep)
        self.population = sorted(self.population, key=lambda x: x.fitness)
        new_generation = self.population[-keep_nr:]
        
        # new_generation = list()

        # Then we make offsprings based on random choices with weights.
        # We raise the exponent fitness to 2 to make the differences more
        # Since we are using "roulette wheel" in selection, we want to increase the chance of
        # the most fit to be selected.
        fitness_values = [x.fitness**2 for x in self.population]
        for i in range(no_of_offspring):
            parent1, parent2 = random.choices(self.population, weights=fitness_values, k=2)
            offspring = parent1.crossover(parent2)
            new_generation.append(offspring)
        
        self.population = new_generation
        return
    
    
    # fitness calculation goes here...
    def assign_fitness(self, classifier, data_set):
        # Fitness is 987 substracted by distance to closest food
        # We want the chromosomes close to some food to survive, not close to all foods on average.
        nr_of_foods = len(self.food)
        for ch in self.population:
            ch.assign_fitness(classifier, data_set)
        return
    
    def get_population(self):
        return self.population
    
    def get_best_chromosome(self):
        ch = max(self.population, key=lambda x: x.fitness)
        return ch


In [210]:
# Here I run the GA engine
ga = GAEngine()
ga.make_initial_population(10)
ga.set_generations(10)


for i in range(ga.get_generations()):
    ga.assign_fitness(classifier, test_set_processed)
    # ca 60 % of population to do crossover
    no_of_crossover = int(0.6 * len(ga.get_population()))
    ga.do_crossover(no_of_crossover)


    # High number of mutations for more variations
    no_of_mutation = 2
    for i in range(no_of_mutation):
        index = random.randint(0, len(ga.get_population())-1)
        ga.get_population()[index].mutate()

ch = ga.get_best_chromosome()

In [211]:
print(ch.fitness)

1.1260330112122419


In [212]:
loss, output_df = classifier.predict_whole_set(test_set_processed, ch.ws_1, ch.ws_2)

In [213]:
print(loss)
output_df

0.8880734312783958


Unnamed: 0,review,rate,label,vectors,predictions,CE-loss
0,go saw movie last night friend see know comedy...,10,"[1, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 2.5764957613619086, ...","[0.3530779853400781, 0.6469220146599218]",1.041066
1,actor turn director follow family u open young...,7,"[1, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.5518464811056155, 0.4481535188943845]",0.594485
2,early movie well fight young go eye american a...,9,"[1, 0]","[2.8383867028302134, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.7373063721280751, 0.26269362787192485]",0.304752
3,saw film act good story movie well film could ...,8,"[1, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.4681015105176985, 0.5318984894823016]",0.759070
4,take true story u open make film much film als...,8,"[1, 0]","[2.8383867028302134, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.4171412605851502, 0.5828587394148498]",0.874330
...,...,...,...,...,...,...
24995,let kid watch understand show minute br anyone...,1,"[0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.4797191937374103, 0.5202808062625897]",0.653387
24996,pretty much tv show people make reason can not...,1,"[0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6747361708125804, 0.32526382918741953]",1.123119
24997,try make lot use title bring small surprise ch...,3,"[0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.976...","[0.37510529933786335, 0.6248947006621366]",0.470172
24998,thing film star always watch feature new film ...,3,"[0, 1]","[0.0, 0.0, 2.256847201320192, 0.0, 0.0, 0.0, 0...","[0.30186384610608336, 0.6981361538939167]",0.359341


In [66]:
import numpy as np
import math

mat1 = np.array([[5,5,5,2], [1,3,14,5], [-6,4,2,3]])
mat2 = np.array([[5,5,5,2], [1,3,14,5], [6,4,2,3]])

In [79]:
if (mat1 >= 0).any():
    print("aaa")
    
if (mat2 < 0).any():
    print("bbb")

aaa


In [None]:
#np.random.shuffle(mat1)
np.random.uniform(low=-2, high=1, size=(3,5))
mat3 = mat1 + np.random.uniform(-1,1,size=(mat1.shape))
mat3

In [1]:
class test(object):
    def __init__(self):
        self._ws_1 = None
    
    @property
    def ws_1(self):
        return self._ws_1 + 1
    
    @ws_1.setter
    def ws_1(self, x):
        self._ws_1 = x + 2
        
    def func(self, x):
        self.ws_1 = x

In [157]:
p = np.array([1,0,0,1])
y = np.array([1,1,0,0])

print(type(p))
summ = y*np.log(p)
print(summ)

sum(np.nan_to_num(summ))

<class 'numpy.ndarray'>
[  0. -inf  nan   0.]


  summ = y*np.log(p)
  summ = y*np.log(p)


-1.7976931348623157e+308

In [5]:
t.ws_1 = 5

print(t.ws_1)

t.func(-15)
print(t.ws_1)

8
-12
