# Import libraries

In [null]:
!pip install sweetviz


In [null]:
import pandas as pd
import psycopg2
import numpy as np
import matplotlib.pyplot     as plt
import matplotlib.patches    as mpatches
import seaborn               as sns
import sweetviz as sv
import sklearn.metrics       as Metrics
from google.colab import drive
from pandas_profiling import ProfileReport
%matplotlib inline

# Read file

In [null]:
drive.mount('/content/drive')


In [null]:
path_dir = '/mnt/f/PythonFinanceProjects/'
filename = 'trending_top_stocks.xlsx'
sheet = 'trending_today'


In [null]:
df = pd.read_excel(f'{path_dir}{filename}', sheet_name=sheet, header=0)

In [null]:
df

# Normalize_cols

In [null]:
def remove_accents_cols(df_cols):
    return df_cols.str.replace('ñ','ni').str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')
def remove_special_chars(df_cols):
    return df_cols.str.replace(r'[$@&/.:-]',' ', regex=True)
def regular_camel_case(snake_str):
    components = snake_str.split('_')
    return components[0] + ''.join(x.title() for x in components[1:])
def regular_snake_case(df_cols):
    cols = df_cols.str.replace('ñ','ni')
    cols = cols.str.lower().str.replace('/',' ').str.replace('.',' ').str.strip()
    cols = cols.str.replace(r'\s+',' ',regex=True)
    cols = cols.str.replace(' ','_')
    return cols

In [null]:
df.columns = remove_accents_cols(df.columns)
df.columns = remove_special_chars(df.columns)
df.columns = regular_snake_case(df.columns)

In [null]:
df

# General stats cells

In [null]:
df.describe()

In [null]:
df.dtypes


In [null]:
df.info()

In [null]:
print('recuento de columnas por tipo: ', df.dtypes.value_counts())
print('sumatoria de valores nulos en el dataframe: ', df.isna().sum())

# General Analyst by columns

## no
type: int64

In [null]:
df['no'] = pd.to_numeric(df[f'no'], errors='coerce')

In [null]:
print('la media es: ',df['no'].mean())

In [null]:
print('la moda es: ',df['no'].mode())

In [null]:
print('la mediana es: ',df['no'].median())

In [null]:
print('el valor mínimo de no es: : ',df['no'].min())

In [null]:
print('el valor máximo de no es: : ',df['no'].max())

In [null]:
print('el rango de no es: : ',df['no'].max() - df['no'].min())

In [null]:
print('la desviación éstandar de no es: : ',df['no'].std())

In [null]:
sns.histplot(data = df,x = 'no')
plt.axvline(x=df['no'].mean(),color='red',linestyle='dashed',linewidth=2)

## symbol
type: object

In [null]:
df['symbol'] = df['symbol'].astype(str)
df['symbol'] = df['symbol'].str.replace('.0','')
df['symbol'].unique()

In [null]:
df['symbol'].value_counts()

In [null]:
plt.figure(figsize=(10,5))
df['symbol'].value_counts()[:15].plot(kind='pie')

In [null]:
plt.figure(figsize=(30,5))
df['symbol'].value_counts()[:15].plot(kind='barh')

## company_name
type: object

In [null]:
df['company_name'] = df['company_name'].astype(str)
df['company_name'] = df['company_name'].str.replace('.0','')
df['company_name'].unique()

In [null]:
df['company_name'].value_counts()

In [null]:
plt.figure(figsize=(10,5))
df['company_name'].value_counts()[:15].plot(kind='pie')

In [null]:
plt.figure(figsize=(30,5))
df['company_name'].value_counts()[:15].plot(kind='barh')

## views
type: int64

In [null]:
df['views'] = pd.to_numeric(df[f'views'], errors='coerce')

In [null]:
print('la media es: ',df['views'].mean())

In [null]:
print('la moda es: ',df['views'].mode())

In [null]:
print('la mediana es: ',df['views'].median())

In [null]:
print('el valor mínimo de views es: : ',df['views'].min())

In [null]:
print('el valor máximo de views es: : ',df['views'].max())

In [null]:
print('el rango de views es: : ',df['views'].max() - df['views'].min())

In [null]:
print('la desviación éstandar de views es: : ',df['views'].std())

In [null]:
sns.histplot(data = df,x = 'views')
plt.axvline(x=df['views'].mean(),color='red',linestyle='dashed',linewidth=2)

## market_cap
type: object

In [null]:
df['market_cap'] = df['market_cap'].astype(str)
df['market_cap'] = df['market_cap'].str.replace('.0','')
df['market_cap'].unique()

In [null]:
df['market_cap'].value_counts()

In [null]:
plt.figure(figsize=(10,5))
df['market_cap'].value_counts()[:15].plot(kind='pie')

In [null]:
plt.figure(figsize=(30,5))
df['market_cap'].value_counts()[:15].plot(kind='barh')

## %_change
type: object

In [null]:
df['%_change'] = df['%_change'].astype(str)
df['%_change'] = df['%_change'].str.replace('.0','')
df['%_change'].unique()

In [null]:
df['%_change'].value_counts()

In [null]:
plt.figure(figsize=(10,5))
df['%_change'].value_counts()[:15].plot(kind='pie')

In [null]:
plt.figure(figsize=(30,5))
df['%_change'].value_counts()[:15].plot(kind='barh')

## volume
type: object

In [null]:
df['volume'] = pd.to_numeric(df[f'volume'], errors='coerce')

In [null]:
print('la media es: ',df['volume'].mean())

In [null]:
print('la moda es: ',df['volume'].mode())

In [null]:
print('la mediana es: ',df['volume'].median())

In [null]:
print('el valor mínimo de volume es: : ',df['volume'].min())

In [null]:
print('el valor máximo de volume es: : ',df['volume'].max())

In [null]:
print('el rango de volume es: : ',df['volume'].max() - df['volume'].min())

In [null]:
print('la desviación éstandar de volume es: : ',df['volume'].std())

In [null]:
sns.histplot(data = df,x = 'volume')
plt.axvline(x=df.volume.mean(),color='red',linestyle='dashed',linewidth=2)

# Sweetviz report

In [null]:
sweet_report = sv.analyze(df)
sweet_report.show_html(f'{path_dir}sw_report_trending_top_stocks.xlsx_trending_today.html')