In [2]:
from csv import reader
from time import time
import random

In [3]:
dataset = "/content/drive/MyDrive/the-reddit-climate-change-dataset-comments.csv"

In [4]:

class Clima:
  def __init__(self, dataset):
    with open(dataset) as f:
      self.reader = reader(f)
      self.rows = list(self.reader)
      self.header = self.rows[0]
      self.rows = self.rows[1:]
      self.id_to_row = {}
      self.sentiment_sorted = {}
      self.score = {}
      for row in self.rows:
        self.id_to_row[row[1]] = row
        self.score[int(row[9])] = row
        if row[8] == '':
          row[8] = 0.0
        else:
          row[8] = float(row[8])

        
          
  
  def get_message_from_id(self, id):
    for row in self.rows:
      if row[1] == id:
        return row
      else:
        return -1
  
  def get_message_from_id_fast(self, id):
    if id in self.id_to_row:
      return self.id_to_row[id]
    else:
      return -1

  
  def get_by_sentiment(self, sentiment_inf, sentiment_sup):
    sentiments = []
    for row in self.rows:
      if row[8] >= sentiment_inf and row[8] < sentiment_sup:
         sentiments.append(row)
    return sentiments
    



  def TwoScoreSum(self, target):
    for row1 in self.rows:
      for row2 in self.rows:
        if row1[9] + row2[9] == target:
          return [row1, row2]
        return -1

  def TwoScoreSum_fast(self, target):
    for current_number in self.score:
      y = target - current_number
      if y in self.score:
        return [self.score[y], self.score[current_number]]
    return -1





  

  





    
    







In [5]:
clima = Clima(dataset)

In [6]:
clima.get_message_from_id_fast('imlddn9')

['comment',
 'imlddn9',
 '2qh3l',
 'news',
 'false',
 '1661990368',
 'https://old.reddit.com/r/news/comments/x2cszk/us_life_expectancy_down_for_secondstraight_year/imlddn9/',
 'Yeah but what the above commenter is saying is their base doesn’t want any of that. They detest all of those things, even the small gradual changes. Investing in nuclear energy is a tacit acknowledgement of man made climate change. Any acknowledgement or concession and they will be primaried out in a minute',
 0.5719,
 '2']

In [7]:
test = clima.get_by_sentiment(0.64, 0.68)
len(test)

77172

In [8]:
clima.TwoScoreSum_fast(25)

[['comment',
  'c0mvyui',
  '2qj8f',
  'skeptic',
  'false',
  '1269542958',
  'https://old.reddit.com/r/skeptic/comments/bi7mu/are_there_any_redditors_on_rskeptic_who_dont/c0mvyui/',
  'I think the anthropogenic global warming/climate change issue is taken all wrong. When you take either side of the argument, you\'re stuck arguing scientific facts from studies that can be interpreted different ways, and then getting into political debates etc...it isn\'t a very productive debate to have.\n\nI think the better way to handle the situation is to make the argument for being more "green" because it means expending less energy. This, in turns, means spending less money on energy. Switching to a more fuel efficient vehicle makes financial sense because it will cost you less in gas. This is something much less politically charged, and much more persuasive. You can then throw in the "And while you\'re saving money, you can also put less pollution into the world".\n\nOr am I just crazy?',
  -0.

# Comparando desempenho das funções:

In [9]:
ids = [random.randint(0, 100) for _ in range(100)]

In [10]:
for id in ids:
  %timeit -n 1000 clima.get_message_from_id(id)

502 ns ± 5.78 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
506 ns ± 13.5 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
516 ns ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
499 ns ± 4.98 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
500 ns ± 6.03 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
498 ns ± 16.5 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
505 ns ± 7.09 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
508 ns ± 10.2 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
462 ns ± 55.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
431 ns ± 25.6 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
455 ns ± 23.3 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
455 ns ± 15 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
450 ns ± 21.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
472 ns ± 10.7 ns per loop (mean ± std. dev. of 7 runs, 1000 loops 

In [11]:
for id in ids:
   %timeit -n 1000 clima.get_message_from_id_fast(id)

The slowest run took 5.00 times longer than the fastest. This could mean that an intermediate result is being cached.
890 ns ± 469 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
The slowest run took 7.15 times longer than the fastest. This could mean that an intermediate result is being cached.
398 ns ± 391 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
189 ns ± 9.63 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
181 ns ± 2.39 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
189 ns ± 3.26 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
187 ns ± 11 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
192 ns ± 3.86 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
196 ns ± 10.2 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
184 ns ± 5.55 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
188 ns ± 3.19 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
184 ns ± 8.48 ns per loop (mean ± std. dev. of 7

In [12]:
sums = [random.randint(1,100) for _ in range(75)]

In [13]:
for sum in sums:
   %timeit -n 1000 clima.TwoScoreSum(sum)

421 ns ± 24.4 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
618 ns ± 253 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
399 ns ± 19.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
409 ns ± 5.17 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
404 ns ± 9.59 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
396 ns ± 11.4 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
415 ns ± 10.9 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
434 ns ± 6.05 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
409 ns ± 4.14 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
390 ns ± 5.66 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
431 ns ± 16.5 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
430 ns ± 60.6 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
554 ns ± 77.8 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
The slowest run took 18.75 times longer than the fastest. This co

In [14]:
for sum in sums:
   %timeit -n 1000 clima.TwoScoreSum_fast(sum)

625 ns ± 234 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
540 ns ± 133 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
540 ns ± 138 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
515 ns ± 112 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
439 ns ± 5.65 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
443 ns ± 13.1 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
440 ns ± 7.43 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
436 ns ± 4.58 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
435 ns ± 4.7 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
529 ns ± 111 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
442 ns ± 12.5 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
481 ns ± 13.2 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
576 ns ± 129 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
440 ns ± 12 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
