In [1]:
from csv import reader
from time import time
import random

In [2]:
dataset = "/content/drive/MyDrive/the-reddit-climate-change-dataset-comments.csv"

In [3]:

class Clima:
  def __init__(self, dataset):
    with open(dataset) as f:
      self.reader = reader(f)
      self.rows = list(self.reader)
      self.header = self.rows[0]
      self.rows = self.rows[1:]
      self.id_to_row = {}
      self.sentiment_sorted = {}
      self.score = {}
      for row in self.rows:
        self.id_to_row[row[1]] = row
        self.score[int(row[9])] = row
        if row[8] == '':
          row[8] = 0.0
        else:
          row[8] = float(row[8])

        
          
  
  def get_message_from_id(self, id):
    for row in self.rows:
      if row[1] == id:
        return row
      else:
        return -1
  
  def get_message_from_id_fast(self, id):
    if id in self.id_to_row:
      return self.id_to_row[id]
    else:
      return -1

  
  def get_by_sentiment(self, sentiment_inf, sentiment_sup):
    sentiments = []
    for row in self.rows:
      if row[8] >= sentiment_inf and row[8] < sentiment_sup:
         sentiments.append(row)
    return sentiments
    



  def TwoScoreSum(self, target):
    for row1 in self.rows:
      for row2 in self.rows:
        if int(row1[9]) + int(row2[9]) == target:
          return [row1, row2]
    return -1

  def TwoScoreSum_fast(self, target):
    for current_number in self.score:
      y = target - current_number
      if y in self.score:
        return [self.score[y], self.score[current_number]]
    return -1





  

  





    
    







In [4]:
clima = Clima(dataset)

# Comparando desempenho das funções:

In [6]:
ids = [random.randint(0, 100) for _ in range(100)]

In [7]:
for id in ids:
  %timeit -n 1000 clima.get_message_from_id(id)

394 ns ± 177 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
249 ns ± 4.37 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
244 ns ± 4.59 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
251 ns ± 14.3 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
247 ns ± 6.03 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
255 ns ± 14.4 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
249 ns ± 8.21 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
246 ns ± 6.31 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
282 ns ± 77.9 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
255 ns ± 16.3 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
247 ns ± 8.3 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
The slowest run took 4.27 times longer than the fastest. This could mean that an intermediate result is being cached.
648 ns ± 459 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
256 ns ± 8.64 ns per 

In [8]:
for id in ids:
   %timeit -n 1000 clima.get_message_from_id_fast(id)

178 ns ± 9.29 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
180 ns ± 10.4 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
180 ns ± 11.2 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
177 ns ± 2.49 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
179 ns ± 5.52 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
177 ns ± 5.17 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
176 ns ± 7.59 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
177 ns ± 4.99 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
167 ns ± 3.41 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
185 ns ± 12.8 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
186 ns ± 4.74 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
176 ns ± 3.87 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
187 ns ± 4.27 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
176 ns ± 4.73 ns per loop (mean ± std. dev. of 7 runs, 1000 loop

In [9]:
sums = [random.randint(1,100) for _ in range(75)]

In [10]:
for sum in sums:
   %timeit -n 1000 clima.TwoScoreSum(sum)

452 µs ± 34.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
942 µs ± 11.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
355 µs ± 8.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.29 µs ± 410 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.67 ms ± 101 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
37.2 µs ± 7.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
45.5 µs ± 2.38 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.38 ms ± 20.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
196 µs ± 7.09 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
9.14 µs ± 344 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
43.3 µs ± 1.93 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.73 ms ± 26.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
15.5 µs ± 684 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
400 µs ± 7.44 µs per loop (mean ± std. dev. of 7 runs, 1000

In [11]:
for sum in sums:
   %timeit -n 1000 clima.TwoScoreSum_fast(sum)

799 ns ± 181 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
440 ns ± 13.9 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
439 ns ± 10.6 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
439 ns ± 11.9 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
433 ns ± 6.45 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
447 ns ± 23.5 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
430 ns ± 4.27 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
444 ns ± 37.2 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
443 ns ± 13.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
437 ns ± 7.9 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
398 ns ± 9.5 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
433 ns ± 8 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
426 ns ± 14.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
430 ns ± 26.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
