# Visualization examples

In [None]:
!pip install seaborn matplotlib pandas numpy

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import seaborn as sns

%matplotlib inline

In [None]:
# Dataset: https://archive.ics.uci.edu/ml/datasets/wine+quality
white_wine = pd.read_csv('winequality-white_new.csv', sep=';')

In [None]:
# Show the data
white_wine

# Visualization with Pandas

## Simple Scatterplot of two variables 

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html
white_wine.plot(kind='scatter',x='citric acid',y='chlorides',color='green')

## Histogram of one column

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.hist.html
white_wine["fixed acidity"].hist(bins=60, color='blue', edgecolor='black',  xlabelsize=12, ylabelsize=12, grid=True)   

## Histogram of all columns

In [None]:
white_wine.hist(bins=15, color='blue', edgecolor='black',  xlabelsize=12, ylabelsize=12, grid=True)    
plt.tight_layout(rect=(0, 0, 3, 3))

# Visualization with Seaborn

## Scatter plot

In [None]:
plt.scatter(white_wine['sulphates'], white_wine['alcohol'],alpha=0.4, edgecolors='w')
plt.xlabel('Sulphates')
plt.ylabel('Alcohol')
plt.title('Wine Sulphates - Alcohol Content',y=1.05)

## Continuous histogram

In [None]:
# https://seaborn.pydata.org/generated/seaborn.kdeplot.html
fig = plt.figure(figsize = (6, 4))
title = fig.suptitle("Alcohol", fontsize=14)
fig.subplots_adjust(top=0.85, wspace=0.3)

ax1 = fig.add_subplot(1,1, 1)
ax1.set_xlabel("Alcohol")
ax1.set_ylabel("Alcohol by Volume") 
sns.kdeplot(white_wine['alcohol'], ax=ax1, shade=True, color='blue')

## Box plot

In [None]:
f, ax  = plt.subplots(1, 1, figsize=(5,5))
f.suptitle('Wine Quality - Alcohol Content', fontsize=12)
sns.boxplot(x="quality", y="alcohol", data=white_wine)
ax.set_xlabel("Wine Quality",size = 12)
ax.set_ylabel("Wine Alcohol %",size = 12)

## Line plot

In [None]:
f, ax  = plt.subplots(1, 1, figsize=(10,10))
f.suptitle('Wine Quality - Alcohol Content', fontsize=12)
sns.lineplot(x="quality", y="alcohol", data=white_wine)
ax.set_xlabel("Wine Quality",size = 12)
ax.set_ylabel("Wine Alcohol %",size = 12)

## Pair plot

In [None]:
columns = ['alcohol', 'fixed acidity']
pp = sns.pairplot(white_wine[columns], height=2, aspect=2, plot_kws=dict(edgecolor="k", linewidth=0.5))

## Heatmap of correlations

In [None]:
corr = white_wine.corr()
corr

In [None]:
hm = sns.heatmap(round(corr,2), annot=True, linewidths=.05)

# Wordcloud

Documentation: https://github.com/amueller/word_cloud

* pip install wordcloud

In [None]:
!pip install wordcloud

In [None]:
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import re
from wordcloud import WordCloud, STOPWORDS
%matplotlib inline

In [None]:
url="https://www.spiegel.de/international/germany/what-next-attention-slowly-turns-to-the-mother-of-all-coronavirus-questions-a-853a559e-d454-41e2-b8f2-e8f2c8d5ca18"

In [None]:
# Request the website
req = requests.get(url)

In [None]:
# Create a Soup Object
website = BeautifulSoup(req.text, 'html')

In [None]:
regex = re.compile('(?=.*RichText.*)(?=.*word-wrap.*)')
textelements = website.findAll('div',{'class':regex})
wordstring=""
for text in textelements:
    x=text.findAll("p")
    for p in x:
        wordstring=wordstring+p.text

In [None]:
wordstring

In [None]:
wordcloud = WordCloud(stopwords=STOPWORDS,background_color='white', max_words=30).generate(wordstring)

In [None]:
plt.clf()
f = plt.subplots(figsize=(10,10))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

In [None]:
wordcloud = WordCloud(stopwords=STOPWORDS,background_color='white', max_words=30).generate(wordstring)

In [None]:
plt.clf()
f = plt.subplots(figsize=(10,10))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()