In [7]:
%load_ext autoreload
%autoreload 2

## Imports

In [8]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import json
import jsonschema
from jsonschema.exceptions import ValidationError
import os
import pathlib
import os.path
import xarray as xr
import matplotlib.pyplot as plt
import requests
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
import time
import os
import numpy as np
from IPython.display import display

### **Next Steps** 

In [9]:
import os
import json
import requests
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
class Visualization:
    def save_telegram_credentials(bot_token: str =None, chat_id: str=None):
        """
        Save the Telegram credentials to a JSON file.

        Args:
            bot_token (str, optional): The bot token. If not provided, the user will be prompted to enter it.
            chat_id (str, optional): The chat ID. If not provided, the user will be prompted to enter it.
            filename (str, optional): The name of the JSON file to save the credentials. Defaults to "telegram_credentials.json".
        """
        bot_token = bot_token or input("Insert the bot_token: ")
        chat_id = chat_id or input("Insert the chat id: ")
        dictionary = {"chat_id": chat_id, "bot_token": bot_token}
        try:
            with open('telegram_credentials.json', "w") as outfile:
                json.dump(dictionary, outfile)
        except Exception as e:
            print("Error occurred while saving telegram credentials:", str(e))
    def send_images_via_telegram(file_path: str, chat_id: str=None, bot_token: str =None, caption: str ="This is a caption"):
        """
        Sends an image via Telegram using the provided file path, chat ID, bot token, and caption.

        Args:
            file_path (str): The path to the image file.
            chat_id (str, optional): The ID of the chat to send the image to. If not provided, it will be retrieved from the 'telegram_credentials.json' file. Defaults to None.
            bot_token (str, optional): The token of the Telegram bot. If not provided, it will be retrieved from the 'telegram_credentials.json' file. Defaults to None.
            caption (str, optional): The caption for the image. Defaults to "This is a caption".

        Raises:
            ValueError: If chat_id and bot_token are not provided and the 'telegram_credentials.json' file does not exist.
            ValueError: If chat_id or bot_token is not provided.

        Returns:
            None
        """
        if chat_id is None and bot_token is None:
            if os.path.exists('telegram_credentials.json'):
                try:
                    with open('telegram_credentials.json', 'r') as openfile:
                        json_object = json.load(openfile)
                    chat_id = json_object.get("chat_id")
                    bot_token = json_object.get("bot_token")
                except ValueError:
                    print("Please use the function 'save_telegram_credentials'")
            else:
                raise ValueError("Please provide the chat_id and the bot_token or use the function 'save_telegram_credentials'.")
        if chat_id is None or bot_token is None:
            raise ValueError("chat_id and bot_token are required parameters")
        base_url = f"https://api.telegram.org/bot{bot_token}/sendPhoto"
        with open(file_path, 'rb') as my_file:
            parameters = {
                "chat_id": chat_id,
                "caption": caption
            }
            files = {   
                "photo": my_file
            }
            try:
                resp = requests.post(base_url, data=parameters, files=files)
                status_code = resp.status_code
                if status_code == 200:
                    print("The photo was sent.")
                else:
                    resp_json = resp.json()
                    print("Sent","-", resp_json.get("ok"))
                    del resp_json["ok"]
                    for key, values in resp_json.items():
                        print(key.capitalize(), "-", values)
            except requests.exceptions.RequestException as e:
                print("An error occurred during the request:", str(e))
    def help_telegram_bot():
        """
        Provides information on how to use a Telegram bot.
        """
        print('''
        1. How to create a bot: https://www.directual.com/lesson-library/how-to-create-a-telegram-bot
        2. Adding the bot to a group: https://botifi.me/en/help/telegram-adding-bot-to-channel-or-group/
        3. Getting the bot_token: https://botifi.me/en/help/telegram-existed-bot/
        4. Getting the chat_id of a group: https://www.wikihow.com/Know-Chat-ID-on-Telegram-on-Android
        5. Possible errors: https://core.telegram.org/api/errors
        ''')
    
    def save_slack_credentials(channel_id:str = None, slack_token:str=None):
        """
        Saves Slack credentials (channel ID and token) to a JSON file.

        Args:
            channel_id (str, optional): The ID of the Slack channel. If not provided, the user will be prompted to enter it.
            slack_token (str, optional): The Slack token. If not provided, the user will be prompted to enter it.

        Returns:
            None: The function does not return any value.
        """
        slack_token = slack_token or input("Insert the slack_token: ")
        channel_id = channel_id or input("Insert the channel_id: ")
        dictionary = {"channel_id": channel_id, "slack_token": slack_token}
        try:
            with open('slack_credentials.json', "w") as outfile:
                json.dump(dictionary, outfile)
        except Exception as e:
            print("Error occurred while saving slack credentials:", str(e))
    def send_images_via_slack(file_path: str, channel_id: str=None, slack_token: str =None, caption: str ="This is a caption"):
        """
        Sends an image file to a specified Slack channel using the Slack API.

        Args:
            file_path (str): The path to the image file to be sent.
            channel_id (str, optional): The ID of the Slack channel to send the image to. If not provided, it will attempt to read the channel ID from a JSON file named 'slack_credentials.json'.
            slack_token (str, optional): The Slack API token. If not provided, it will attempt to read the token from the same JSON file mentioned above.
            caption (str, optional): The caption to be displayed with the image in Slack.

        Raises:
            ValueError: If 'slack_credentials.json' file is not found or the values are not valid.
            ValueError: If either `channel_id` or `slack_token` is missing.

        Returns:
            None
        """
        if channel_id is None and slack_token is None:
            if os.path.exists('slack_credentials.json'):
                try:
                    with open('slack_credentials.json', 'r') as openfile:
                        json_object = json.load(openfile)
                    channel_id = json_object.get("channel_id")
                    slack_token = json_object.get("slack_token")
                except ValueError:
                    print("Please use the function 'save_slack_credentials'")
            else:
                raise ValueError("Please provide the channel_id and the slack_token or use the function 'save_slack_credentials'.")
        if channel_id is None or slack_token is None:
            raise ValueError("channel_id and slack_token are required parameters")
        client = WebClient(token=slack_token)
        try:
            response = client.files_upload(
                channels=channel_id,
                file=file_path,
                title=caption
            )
            if response["ok"]:
                print("The photo was sent.")
            else:
                for key, value in response.items():
                    print(f"{key.capitalize()}: {value}")
        except SlackApiError as e:
            print(f"Error uploading file: {e.response['error']}")
    def help_slack_bot():
        """
        Provides a list of resources to help users create and configure a Slack bot.
        """
        print('''
            1. Creating a slack_bot (read the first paragraph): https://medium.com/applied-data-science/how-to-build-you-own-slack-bot-714283fd16e5
            2. Getting the channel_id (read method 1): https://www.process.st/how-to/find-slack-channel-id/ 
            ''')

# Suntzu Tests

In [1]:
import suntzu as snt
df = snt.read_file("examples/titanic.csv")
# print(df.head())
snt.Cleaning.capitalize_rows_string(df, ["Sex"]).head()



Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",Male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",Female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",Male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",Male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",Female,22.0,1,1,3101298,12.2875,,S


In [3]:
cleaning = snt.start_Cleaning(df)
cleaning.capitalize_rows_string(["Sex"]).head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",Male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",Female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",Male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",Male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",Female,22.0,1,1,3101298,12.2875,,S


In [4]:
teste = snt.start_optimization(cleaning)
teste.get_best_dtypes()

The best dtype for PassengerId is int16
The best dtype for Pclass is int8
The best dtype for Name is category
The best dtype for Sex is category
But consider changing it to bool, has you have 2 unique values so you can map the numbers to be True or False
The best dtype for Age is float16
The best dtype for SibSp is int8
The best dtype for Parch is int8
The best dtype for Ticket is category
The best dtype for Fare is float16
The best dtype for Cabin is category
The best dtype for Embarked is category


# Testes

Packages

In [104]:
from matplotlib.container import BarContainer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools

Sample Dataframes

In [81]:
data = {
    'Tempo': pd.date_range('2024-01-01', periods=100),
    'Vendas': np.random.randint(0, 100, size=100),
    'Lucro': np.random.uniform(0, 1000, size=100),
    'Categoria': np.random.choice(['A', 'B', 'C'], size=100),
    'Idade': np.random.randint(18, 70, size=100)
}

df = pd.DataFrame(data)

In [82]:
# Sample DataFrame for Line Plot
df_line = pd.DataFrame({
    'Date': pd.date_range(start='1/1/2023', periods=100),
    'Value': np.random.randn(100).cumsum()
})

df_line.head()

Unnamed: 0,Date,Value
0,2023-01-01,0.428114
1,2023-01-02,0.637351
2,2023-01-03,0.000632
3,2023-01-04,2.122428
4,2023-01-05,1.166033


In [83]:
# Sample DataFrame for Bar Plot
df_bar = pd.DataFrame({
    'Category': ['A', 'B', 'C', 'D', 'E', 'F', 'G'],
    'Value': [23, 45, 56, 111, 11, 11, 78]
})

df_bar.head(10)

Unnamed: 0,Category,Value
0,A,23
1,B,45
2,C,56
3,D,111
4,E,11
5,F,11
6,G,78


In [84]:
# Sample DataFrame for Box Plot
df_box = pd.DataFrame({
    'Category': np.random.choice(['A', 'B', 'C', 'D'], size=200),
    'Value': np.random.randn(200)
})

df_box.head()


Unnamed: 0,Category,Value
0,B,0.379678
1,A,0.873341
2,B,0.186129
3,D,-0.142569
4,D,-0.333876


In [85]:
# Sample DataFrame for KDE Plot
df_kde = pd.DataFrame({
    'Value': np.random.randn(1000)
})

df_kde.head()

Unnamed: 0,Value
0,-0.232699
1,-0.306809
2,-0.878051
3,1.394822
4,-1.010992


In [86]:
# Sample DataFrame for Geographical Map
df_geo = pd.DataFrame({
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Latitude': [40.7128, 34.0522, 41.8781, 29.7604, 33.4484],
    'Longitude': [-74.0060, -118.2437, -87.6298, -95.3698, -112.0740],
    'Value': [100, 200, 300, 400, 500]
})

df_geo.head()

Unnamed: 0,City,Latitude,Longitude,Value
0,New York,40.7128,-74.006,100
1,Los Angeles,34.0522,-118.2437,200
2,Chicago,41.8781,-87.6298,300
3,Houston,29.7604,-95.3698,400
4,Phoenix,33.4484,-112.074,500


In [87]:
# Sample DataFrame for Histogram
df_histogram = pd.DataFrame({
    'Value': np.random.randn(1000)
})

df_histogram.head()

Unnamed: 0,Value
0,-0.326478
1,0.49806
2,0.739663
3,1.257733
4,1.704362


In [88]:
# Sample DataFrame for Correlation Heatmap
df_heatmap = pd.DataFrame({
    'A': np.random.randn(100),
    'B': np.random.randn(100),
    'C': np.random.randn(100),
    'D': np.random.randn(100)
})

df_heatmap.head()

Unnamed: 0,A,B,C,D
0,-1.852159,0.613771,1.449307,0.558466
1,-1.960312,1.102955,-1.311373,-0.356735
2,-0.885425,0.594867,1.296173,2.118624
3,-0.075529,-0.439919,0.742398,0.801854
4,0.338507,-0.108037,-1.486185,1.09412


In [89]:
# Sample DataFrame for Multi-line Plot
df_multiline = pd.DataFrame({
    'Date': pd.date_range(start='1/1/2023', periods=100),
    'Value1': np.random.randn(100).cumsum(),
    'Value2': np.random.randn(100).cumsum(),
    'Value3': np.random.randn(100).cumsum()
})

df_multiline.head()

Unnamed: 0,Date,Value1,Value2,Value3
0,2023-01-01,0.781134,-0.808166,0.705349
1,2023-01-02,0.864594,-1.068959,0.721484
2,2023-01-03,-0.703344,-1.519424,-0.508626
3,2023-01-04,-1.174385,-0.906237,-0.5314
4,2023-01-05,-1.126011,-0.831715,-1.281753


lineplot - Done

bar plot - Done

multilineplot - Done

correlation heatmap

box plot

histogram

kde plot

cross tabulation

geographical map

Functions

In [115]:
# increase_graph_size(15, 6)
# set_grid(grid_border=[False, False, True, True], minorgridlines=False)
# bars = barplot(df_bar, "Category", "Value", grid=True)
# bars = change_bar_colors(bars, ["blue", "yellow"], alpha=0.5)
# bars = highlight_equal_values(bars)
# bars =hightlight_median(bars)
# bars = highlight_max_min_bar(bars)
# bars = show_bar_values(bars)

In [98]:
# increase_graph_size(15, 10)
# set_grid(grid_border=False)
# set_title_settings(size=24)
# set_labels_settings(labelsize=20)
# set_marker_settings(outlinewidth=5)

In [99]:
# increase_graph_size(15, 6)
# set_labels_settings()
# set_title_settings()
# set_grid()
# # reset_settings()
# set_line_settings(marker="o", markerfacecolor="black", markeredgecolor="white", markersize=7, linestyle="--")
# teste = lineplot(df_line, "Date", "Value")

In [114]:
# multilineplot(df_multiline, "Date", ["Value1", "Value2", "Value3"], colors=["green", "red", "blue"])