# Exploratory Analysis  

## Imports

In [145]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from dotenv import load_dotenv
from IPython.display import Markdown, display
from sqlalchemy import create_engine
from tabulate import tabulate
import seaborn as sns
import matplotlib.pyplot as plt

## Configuring Seaborn

In [146]:
sns.set_palette(['#2B3078', '#535BC2', '#7D85FF'])

## Fetching the Dataset

In [147]:
load_dotenv()

engine = create_engine(url=os.getenv('DB_URL'))
df = pd.read_sql('abacus_survey', engine)

print(tabulate(df.head(), headers='keys', tablefmt='fancy_grid'))

╒════╤══════╤════════════════════════════════════╤══════════════╤═══════════════════════════════════════════════════════════════════════════════════╤═════════════════════╤══════════════════════════════╤════════════════════════╤════════════════╤═════════════════════════╤══════════════════╤══════════════════════════════════╤══════════════════════════════╤════════════════════╤══════════════════════════╕
│    │   id │ email                              │ age          │ job_responsibilities                                                              │ state               │   abacus_counting_importance │ abacus_counting_time   │ has_problems   │   technology_acceptance │ can_use_mobile   │ is_signal_quality_satisfactory   │ photo_without_interference   │   lighting_quality │ would_use_or_recommend   │
╞════╪══════╪════════════════════════════════════╪══════════════╪═══════════════════════════════════════════════════════════════════════════════════╪═════════════════════╪═════════════════════

## Basic Information

In [None]:
display(Markdown('### Dimensions'))
print(df.shape)

display(Markdown('### Data types'))
print(df.dtypes)

display(Markdown('### Missing data per column'))
print(df.isnull().sum())

### Dimensions

(59, 14)


### Data types

id                                  int64
email                              object
age                                object
job_responsibilities               object
state                              object
abacus_counting_importance         object
abacus_counting_time               object
has_problems                       object
technology_acceptance             float64
can_use_mobile                     object
is_signal_quality_satisfactory     object
photo_without_interference         object
lighting_quality                  float64
would_use_or_recommend             object
dtype: object


### Missing data per column

id                                 0
email                              0
age                                0
job_responsibilities               0
state                              0
abacus_counting_importance         0
abacus_counting_time               0
has_problems                       0
technology_acceptance             30
can_use_mobile                     0
is_signal_quality_satisfactory    22
photo_without_interference        26
lighting_quality                  14
would_use_or_recommend             0
dtype: int64


## Cleaning the dataset

In [150]:
df.drop(columns=['email'], inplace=True)
df.drop(columns=['id'], inplace=True)

df['has_problems'] = df['has_problems'].map({'Sim':1, 'Não':0})
df['can_use_mobile'] = df['can_use_mobile'].map({'Sim':1, 'Não':0})
df['would_use_or_recommend'] = df['would_use_or_recommend'].map({'Sim':1, 'Não':0})
df['is_signal_quality_satisfactory'] = df['is_signal_quality_satisfactory'].map({'Sim':1, 'Não':0})
df['photo_without_interference'] = df['photo_without_interference'].map({'Sim':1, 'Não':0})

print(tabulate(df.head(), headers='keys', tablefmt='fancy_grid'))

╒════╤══════════════╤═══════════════════════════════════════════════════════════════════════════════════╤═════════════════════╤══════════════════════════════╤════════════════════════╤════════════════╤═════════════════════════╤══════════════════╤══════════════════════════════════╤══════════════════════════════╤════════════════════╤══════════════════════════╕
│    │ age          │ job_responsibilities                                                              │ state               │   abacus_counting_importance │ abacus_counting_time   │   has_problems │   technology_acceptance │   can_use_mobile │   is_signal_quality_satisfactory │   photo_without_interference │   lighting_quality │   would_use_or_recommend │
╞════╪══════════════╪═══════════════════════════════════════════════════════════════════════════════════╪═════════════════════╪══════════════════════════════╪════════════════════════╪════════════════╪═════════════════════════╪══════════════════╪══════════════════════════════════╪