# Demo API data load

This code imports several Python libraries and also sets the maximum number of columns to display in a Pandas DataFrame to None, which allows all columns to be displayed without truncation.

In [None]:
import os
import json
import requests
import pandas as pd
pd.set_option('display.max_columns', None)

## 1. Authorization with token

In this section code  opens a JSON file and loads its contents into a Python dictionary.


In [None]:
def access_data(file_path):
    with open(file_path) as file:
        access_data = json.load(file)
    return access_data

creds = access_data(file_path='token.json')
print(creds.keys())

In [None]:
header = {
    'Authorization': f'OAuth {creds["oauth_token"]}'
}

## 2. Download loop

This section of the code sets up the necessary parameters and credentials for accessing the AppMetrica API and then iterates through a range of dates to download data into CSV files.

In [None]:
app_id = '****'
api_point = 'events'
fields = [
    'event_datetime',
    'event_json',
    'event_name',
    'event_receive_datetime',
    'event_receive_timestamp',
    'event_timestamp',
    'session_id',
    'installation_id',
    'appmetrica_device_id',
    'city',
    'connection_type',
    'country_iso_code',
    'device_ipv6',
    'device_locale',
    'device_manufacturer',
    'device_model',
    'device_type',
    'google_aid',
    'ios_ifa',
    'ios_ifv',
    'mcc',
    'mnc',
    'operator_name',
    'original_device_model',
    'os_name',
    'os_version',
    'profile_id',
    'windows_aid',
    'app_build_number',
    'app_package_name',
    'app_version_name',
    'application_id'
]

In [None]:
from datetime import date, timedelta
import time

def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2024, 5, 26)
end_date = date(2024, 6, 2)

In [None]:
save_path = f'data/{api_point}'
os.makedirs(save_path, exist_ok=True)
DELAY = 102

for day in daterange(start_date, end_date):
    sday = day.strftime('%Y-%m-%d')
    print('start loading day', sday, end=' ')
    url = ''.join([
        f'https://api.appmetrica.yandex.ru/logs/v1/export/events.csv?',
        f'application_id={app_id}',
        f'&date_since={sday}',
        f'&date_until={sday}',
        f'&fields={",".join(fields)}'
    ])
    status_code = None
    while status_code != 200:
        r = requests.get(url, headers=header)
        status_code = r.status_code
        if status_code != 200:
            print('status code', status_code, 'waiting...', end=' ')
            time.sleep(DELAY)
    file_name = f'{save_path}/data_{sday}.csv'
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(r.text.encode('l1').decode())
    print('data saved to file', file_name)

## 3. Test loaded data

In this section code identifies the directory where the downloaded CSV files are stored. Reads the data from the most recent CSV file into a Pandas DataFrame. Prints the shape of the DataFrame (number of rows and columns). Displays the first few rows of the DataFrame. And then prints information about the DataFrame, including data types and missing values.

In [None]:
api_point = 'events'
save_path = f'data/{api_point}'
all_files = os.listdir(save_path)

In [None]:
df = pd.read_csv(f'{save_path}/{all_files[-1]}')
print(df.shape)
display(df.head())

In [None]:
df.info()