In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import requests
import plotly.express as px
from dotenv import load_dotenv


def summary():
    df_regs = pd.read_csv("regs_1k.csv")
    df_visits = pd.read_csv("visits_1k.csv")

    summary_regs = df_regs.describe()
    summary_visits = df_visits.describe()
    return summary_regs, summary_visits


def conversion():
    # Загружаем переменные окружения из файла .env
    load_dotenv()

    # Получаем даты из переменных окружения
    start_date = os.getenv("DATE_BEGIN")
    end_date = os.getenv("DATE_END")
    api_url = os.getenv("API_URL")

    # Делаем две ссылки
    visit_url = f"{api_url}/visits?begin={start_date}&end={end_date}"
    reg_url = f"{api_url}/registrations?begin={start_date}&end={end_date}"

    # Теперь тянем визиты и регистрации
    visits = requests.get(visit_url)
    registrations = requests.get(reg_url)

    # Получаем данные в формате JSON
    visits = visits.json()
    registrations = registrations.json()

    # Конвертируем в датафрейм
    registrations = pd.DataFrame(registrations)
    visits = pd.DataFrame(visits)

    # Убираем ботов
    filtered_visits = visits[
        ~visits["user_agent"].str.contains("bot", case=False, na=False)
    ]

    # Конвертируем в дату
    filtered_visits["datetime"] = pd.to_datetime(filtered_visits["datetime"])
    registrations["datetime"] = pd.to_datetime(registrations["datetime"])

    # Сортируем и берем последний визит
    filtered_visits = filtered_visits.sort_values(by=["visit_id", "datetime"])
    last_visit = filtered_visits.groupby("visit_id").last().reset_index()

    # Группируем по датам и платформам
    last_visit["date_group"] = last_visit["datetime"].dt.date
    df_visit = (
        last_visit.groupby(["platform", "date_group"]).size().reset_index(name="visits")
    )

    # Регистрации по датам и платформам
    registrations["date_group"] = registrations["datetime"].dt.date
    df_reg = (
        registrations.groupby(["platform", "date_group"])
        .size()
        .reset_index(name="registrations")
    )

    # Объединяем и вычисляем конверсию
    df = pd.merge(df_visit, df_reg, on=["platform", "date_group"], how="outer").fillna(
        0
    )
    df["conversion"] = 100 * df["registrations"] / df["visits"]
    df = df.sort_values(by=["date_group"])
    df = df[["date_group", "platform", "visits", "registrations", "conversion"]]

    # Сохраняем в JSON
    df.to_json("./conversion.json")

    return df


def process_ad_data(
    company_file="ads.csv", conversion_file="conversion.json", output_file="./ads.json"
):
    # Загружаем данные о компаниях и конверсии
    df_company = pd.read_csv(company_file)
    df_conversion = pd.read_json(conversion_file)

    # Преобразуем данные о компаниях
    df_company["date"] = pd.to_datetime(df_company["date"])
    df_company["date_group"] = df_company["date"].dt.date

    # Группируем данные по кампании и дате
    df_company = df_company.groupby(["date_group", "utm_campaign"], as_index=False)[
        "cost"
    ].sum()
    df_company["utm_campaign"] = df_company["utm_campaign"].fillna("none")
    df_company["cost"] = df_company["cost"].fillna(0)

    # Преобразуем дату в данных о конверсии
    df_conversion["date_group"] = pd.to_datetime(
        df_conversion["date_group"], unit="ms"
    ).dt.date

    # Объединяем данные о компаниях и конверсии
    df = pd.merge(df_company, df_conversion, on=["date_group"], how="outer").fillna(0)

    # Определяем порядок колонок
    df = df[["date_group", "visits", "registrations", "cost", "utm_campaign"]]
    df = df.sort_values(by=["date_group"])

    # Сохраняем итоговые данные в JSON
    df.to_json(output_file)

    return df
# Создаем директорию ./charts, если она не существует
os.makedirs("./charts", exist_ok=True)

# Грузим все источники
visits = pd.read_json("visits.json")
conversion = pd.read_json("conversion.json")
ads = pd.read_json("ads.json")
registration = pd.read_json("registrations.json")

# Преобразуем дату
conversion["date_group"] = pd.to_datetime(conversion["date_group"], unit="ms").dt.date


def plot_total_visits(start_date=None, end_date=None):
    filtered_data = conversion
    if start_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] >= pd.to_datetime(start_date).date()
        ]
    if end_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] <= pd.to_datetime(end_date).date()
        ]

    plt.figure(figsize=(14, 7))
    bars = plt.bar(
        filtered_data["date_group"],
        filtered_data["visits"],
        color="lightblue",
        edgecolor="blue",
    )
    plt.title("Total Visits", fontsize=16)
    plt.xlabel("Date Group", fontsize=14)
    plt.ylabel("Visits", fontsize=14)
    plt.xticks(rotation=45)
    plt.grid(axis="y")

    for bar in bars:
        yval = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            yval,
            int(yval),
            ha="center",
            va="bottom",
            fontsize=10,
        )

    plt.tight_layout()
    plt.savefig("./charts/total_visits.png")
    plt.show()


def plot_total_visits_by_platform(start_date=None, end_date=None):
    filtered_data = conversion
    if start_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] >= pd.to_datetime(start_date).date()
        ]
    if end_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] <= pd.to_datetime(end_date).date()
        ]

    plt.figure(figsize=(14, 7))
    sns.barplot(x="date_group", y="visits", data=filtered_data, hue="platform")
    plt.title("Total Visits by Platform", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Total Visits", fontsize=14)
    plt.xticks(rotation=45)

    plt.tight_layout()
    plt.savefig("./charts/total_visits_by_platform.png")
    plt.show()


def plot_total_registrations(start_date=None, end_date=None):
    filtered_data = conversion
    if start_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] >= pd.to_datetime(start_date).date()
        ]
    if end_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] <= pd.to_datetime(end_date).date()
        ]

    plt.figure(figsize=(14, 7))
    bars = plt.bar(
        filtered_data["date_group"],
        filtered_data["registrations"],
        color="lightblue",
        edgecolor="blue",
    )
    plt.title("Total Weekly Registrations", fontsize=16)
    plt.xlabel("Date Group", fontsize=14)
    plt.ylabel("Registrations", fontsize=14)
    plt.xticks(rotation=45)
    plt.grid(axis="y")

    for bar in bars:
        yval = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            yval,
            int(yval),
            ha="center",
            va="bottom",
            fontsize=10,
        )

    plt.tight_layout()
    plt.savefig("./charts/total_registrations.png")
    plt.show()


def plot_registrations_by_platform(start_date=None, end_date=None):
    filtered_data = conversion
    if start_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] >= pd.to_datetime(start_date).date()
        ]
    if end_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] <= pd.to_datetime(end_date).date()
        ]

    plt.figure(figsize=(14, 7))
    sns.barplot(x="date_group", y="registrations", data=filtered_data, hue="platform")
    plt.title("Weekly Registrations by Platform", fontsize=16)
    plt.xlabel("Date Group", fontsize=14)
    plt.ylabel("Total Registrations", fontsize=14)
    plt.xticks(rotation=45)

    plt.tight_layout()
    plt.savefig("./charts/registrations_by_platform.png")
    plt.show()


def plot_overall_conversion(start_date=None, end_date=None):
    filtered_data = conversion
    if start_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] >= pd.to_datetime(start_date).date()
        ]
    if end_date:
        filtered_data = filtered_data[
            filtered_data["date_group"] <= pd.to_datetime(end_date).date()
        ]

    plt.figure(figsize=(14, 7))
    plt.plot(
        filtered_data["date_group"],
        filtered_data["conversion"],
        marker="o",
        linestyle="-",
        color="b",
        label="Overall Conversion",
    )

    for i, txt in enumerate(filtered_data["conversion"]):
        plt.annotate(
            f"{txt}%",
            (filtered_data["date_group"].iloc[i], filtered_data["conversion"].iloc[i]),
            textcoords="offset points",
            xytext=(0, 10),
            ha="center",
        )

    plt.title("Overall Conversion", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Conversion (%)", fontsize=14)
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    plt.savefig("./charts/overall_conversion.png")
    plt.show()


Файл '.env' успешно создан с содержимым:
API_URL=https://data-charts-api.hexlet.app
DATE_BEGIN=2023-03-01
DATE_END=2023-09-01

