PROJETO - WORDCLOUD MAMA'S GUNS

Bibliotecas a serem utilizadas:
- RE
- Requests
- WordCloud
- Matplotlib
- Numpy
- PIL


In [40]:
import re 
import requests
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import numpy as np
from PIL import Image

In [7]:
# Abrir o txt como uma variavel
with open ('./MamasGunLyrics.txt', 'r') as lyric:
    words = lyric.read()

print("Quantidade de palavras: ", len(words))

Quantidade de palavras:  19845


In [9]:
# organizar numa lista de palavras
tokens = words.split()

# conversão em minusculo
for i in range(len(tokens)):
    tokens[i] = tokens[i].lower().strip()

print("Algumas palavras em lista para confirmar: ",tokens[:20])

Algumas palavras em lista para confirmar:  ["here's", 'my', 'philosophy', "livin'", 'in', 'a', 'penitentiary', 'brothers', 'all', 'on', 'the', 'corner', "tryin'", 'to', 'make', 'believe', 'turn', 'around', "ain't", 'got']


In [11]:
# Como o WordCloud não consegue utilizar as palavras em lista, é preciso adicionar tudo em uma grande string.
wordsTogether = " ".join(tokens)

print("Algumas palavras em lista para confirmar: ",wordsTogether[0:5000])

Algumas palavras em lista para confirmar:  here's my philosophy livin' in a penitentiary brothers all on the corner tryin' to make believe turn around ain't got no pot to pee make me mad when i see ya sad can't stand to see ya hustle doin' bad but you can't win when your will is weak when you're knocked on the ground evil, don't you test me evil, you won't win oh why world why world do want me to be so mad yeah so mad yeah oh why world why world do you want me to be so mad yeah so mad yeah i'm tellin' you the truth i hope i'm not being too cruel since you ain't playing by the rules i'm 'bout to kick you off your stool evil, don't you test me evil, you won't win oh why world why world do want me to be so mad yeah so mad yeah oh why world why world do you want me to be so mad yeah so mad yeah i am a warrior princess i have come from the other sun gather all of your members unite them as one move together in harmony build a bridge now destroy a bridge eliminate your weakness conquer evil,

In [None]:
# Configuração do WordCloud. StopWord e configurações graficas.
stopwords_custom = STOPWORDS.union({'is', 'is', 'said', 'how', 'but', 'my', 'make', 'move', 'said', 'nigga', 'will', 'see', 'oh', 'say', 'di', "ain't", 'da', 'cause', 'know'})

# Função para converter código hexadecimal para RGB
def hex_to_rgb(hex_code):
    hex_code = hex_code.lstrip('#')
    rgb = tuple(int(hex_code[i:i+2], 16) for i in (0, 2, 4))
    return rgb

background_color = hex_to_rgb('#340A14')

###
# Criando um gradiente de cores personalizado
colors = ["#A93830", "#F28E37", "#CD622B", '#88733C']
cmap = LinearSegmentedColormap.from_list("mycmap", colors)


wordcloud = WordCloud(background_color=background_color,
            stopwords = stopwords_custom,
            contour_width = 1,
            width=1600, height=800,
            colormap=cmap,
            contour_color = 'blue',
            font_path='./impact.ttf',
            random_state=50).generate(wordsTogether)

# Mostrando a imagem do wordcloud
plt.figure(figsize = (20, 10), facecolor = None)
plt.imshow(wordcloud, interpolation='bilinear')

# Remover os eixos do grafico
plt.axis("off")
plt.tight_layout(pad=0)

plt.show()

In [15]:
# Para realizar a contagem de quantidade das palavras, a gente pode utilizar a biblioteca Colletions, e sua função Counter

from collections import Counter

erykahwords_count = Counter(tokens)

erykahwords_count.most_common()

[('i', 155),
 ('you', 136),
 ('the', 83),
 ('to', 83),
 ('in', 81),
 ('and', 81),
 ('my', 76),
 ('love', 75),
 ('with', 71),
 ('me', 70),
 ('your', 70),
 ('it', 63),
 ("don't", 59),
 ('a', 58),
 ("i'm", 56),
 ('so', 45),
 ('but', 43),
 ('be', 36),
 ('no', 35),
 ('want', 35),
 ('let', 35),
 ("can't", 32),
 ('know', 31),
 ('on', 30),
 ("ain't", 30),
 ('go,', 30),
 ('why', 29),
 ('world', 29),
 ('yeah', 28),
 ('too', 28),
 ('what', 28),
 ('do', 27),
 ('life', 24),
 ('oh', 23),
 ('that', 23),
 ('he', 23),
 ('go', 22),
 ('all', 20),
 ('mad', 20),
 ('when', 20),
 ("it's", 20),
 ('alright', 20),
 ('make', 19),
 ('is', 18),
 ('one', 18),
 ('ooh,', 18),
 ('you,', 18),
 ('ya', 17),
 ('way', 17),
 ('cha', 17),
 ('if', 17),
 ("didn't", 16),
 ('him', 16),
 ('baby', 16),
 ('of', 15),
 ('where', 15),
 ('said', 15),
 ('cause', 15),
 ('need', 14),
 ("you'll", 14),
 ('got', 13),
 ('we', 13),
 ('up', 13),
 ('oh,', 13),
 ('they', 13),
 ('see', 12),
 ('back', 12),
 ('wanna', 12),
 ('bag', 12),
 ('mind', 11