In [9]:
import email
from email.policy import default
from __future__ import print_function
import base64
import os
import os.path
import json

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

%config InlineBackend.figure_format = 'retina'

In [11]:
def setup():
    """
    Configuration to authenticate with API
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.|
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            # creds = flow.run_local_server(port=0)
            creds = flow.run_console()
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
            
    return creds

In [11]:
def get_labels(service, user_id='me'):
    """
    Returns list of email labels

    service: Resource instance for interacting with API
    user_id: string identifyin the gmail target user
    """
    results = service.users().labels().list(userId=user_id).execute()
    labels = results.get('labels', [])
    if not labels:
        print('No labels found.')
        return
    return labels

In [13]:
def list_messages(service, user_id='me', max_results=100):
    """
    Returns a list of email messages in the shape of ids

    service: Resource instance for interacting with API
    user_id: string identifyin the gmail target user
    """
    list_of_messages = service.users().messages().list(userId=user_id, includeSpamTrash=True, maxResults=max_results).execute()
    return list_of_messages


In [15]:
def get_message(service,  message_id, user_id='me', format='full'):
    """
    Gets a particular message/email's data by id

    service: Resource instance for interacting with API
    message_id: Unique message identifier. 
    user_id: String identifyin the gmail target user
    format: Specifies scope of contents in the returned message
    """
    e_mail = service.users().messages().get(userId=user_id, id=message_id, format='full').execute()
    return e_mail

In [17]:
def decode_message(data):
    """
    Decodes a base64 data message

    data: encoded data string
    """
    decoded_bytes = base64.urlsafe_b64decode(data)
    decoded_str = str(decoded_bytes, "utf-8")
    return decoded_str

In [19]:
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']


def main():
    """Shows basic usage of the Gmail API.
    Lists the user's Gmail labels.
    """
    creds = setup()

    try:
        # Call the Gmail API
        service = build('gmail', 'v1', credentials=creds)
        
        ## get labels 
        labels = get_labels(service, 'me')
        # what does labels look like?
        # print("What do labels look like?: ",labels) # it is a list of dictionaries, 'id' and 'name' keys are the ones that we care about.

        ## get list of messages
        messages = list_messages(service, 'me', '100')
        # what do messages look like?
        # print("What do messages look like?: ", messages) # messages is a dictionary, 'messages' is the key we care about. The corresponding value is a list of dictionaries. Each 'id' contains the message identifier value.

        
        # create necessary folders to save data
        if not os.path.exists('dataset/SPAM'):
            os.makedirs('dataset/SPAM')
        if not os.path.exists('dataset/NO_SPAM'):
            os.makedirs('dataset/NO_SPAM')

        map_label_to_folder = {
            'INBOX': 'NO_SPAM',
            'SPAM': 'SPAM'
        }

        ## explore a single message
        for message in messages['messages']:
            # get the message id value and pass it to get_message() to get the actual contents of a message
            message = get_message(service, message['id'], 'me', 'full')
            # print(message) # the message is a dictionary containing many different key-value pairs
            # the important keys are: 
            # 'labelIds' which contains the label category (INBOX and SPAM are important)
            # 'payload' which contains the actual body of the message, however, it is not directly accessible.
            # 'parts' which is nested inside 'payload'
            # rest of documentation: https://developers.google.com/gmail/api/reference/rest/v1/users.messages

            expected_label = None
            # if message label is 'INBOX' (no spam) or 'SPAM' (expected labels)
            if 'INBOX' in message['labelIds']:
                expected_label = 'INBOX'
            elif 'SPAM' in message['labelIds']:
                expected_label = 'SPAM'
            
            decoded_message = None # for some users some emails might not contain text/plain data
            if expected_label:
                # check if message contains 'parts' or not
                if 'parts' in message['payload']:
                    # iterate over message parts
                    for part in message['payload']['parts']:
                        # If type is 'text/plain', take it and decode it
                        if part['mimeType']=='text/plain':
                            decoded_message = decode_message(part['body']['data'])
                
                # if message has no 'parts'
                # check if message is 'text/plain'
                elif message['payload']['mimeType']=='text/plain':
                    decoded_message = decode_message(message['payload']['body']['data'])
                
                # save message in corresponding folder
                # prepare the file name, count number of files and add one, to name files
                number_of_files = len(os.listdir(f'dataset/{map_label_to_folder[expected_label]}')) + 1
                # to keep files correctly ordered add a leading zero to the count, if '1' then turn it into '01'
                file_number = str(number_of_files).zfill((2))
                
                # if message exists
                if decoded_message:
                    # if INBOX label save it into the 'NO SPAM' folder, else to the 'SPAM' folder
                    with open(f'dataset/{map_label_to_folder[expected_label]}/sample_{file_number}.json', 'w') as json_data:
                        json.dump({'data': decoded_message}, json_data)      

    except HttpError as error:
        # TODO(developer) - Handle errors from gmail API.
        print(f'An error occurred: {error}')

In [21]:
main()

FileNotFoundError: [Errno 2] No such file or directory: 'credentials.json'

In [23]:
from __future__ import print_function
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow

# Если измените области доступа, удалите файл token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def main():
    creds = None
    # Файл token.json хранит токен пользователя.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # Если нет допустимых учетных данных, инициируем процесс входа.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Сохраняем учетные данные для следующих запусков.
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    print("Access token:", creds.token)

if __name__ == '__main__':
    main()


FileNotFoundError: [Errno 2] No such file or directory: 'credentials.json'

In [None]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Функция для ввода email и перехода к вводу пароля
def input_email(driver, email):
    try:
        email_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "identifier"))
        )
        email_input.clear()
        email_input.send_keys(email)
        next_button = driver.find_element(By.ID, "identifierNext")
        next_button.click()
    except Exception as e:
        print(f"Ошибка при вводе email: {e}")
        raise

# Функция для ввода пароля и попытки входа
def input_password(driver, password):
    try:
        password_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='password'][name='Passwd']"))
        )
        password_input.clear()
        password_input.send_keys(password)
        next_button = driver.find_element(By.ID, "passwordNext")
        next_button.click()
    except Exception as e:
        print(f"Ошибка при вводе пароля: {e}")
        raise

def main():
    # Захардкоженные данные для входа (не рекомендуется для использования в продакшене)
    email = "chatg9290@gmail.com"        # Замените на ваш email
    password = "3141592chatgpt"              # Замените на ваш пароль

    # Настройка параметров Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Уберите, если хотите видеть окно браузера
    # Устанавливаем специальный user-agent
    special_user_agent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                          "AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/90.0.4430.93 Safari/537.36")
    chrome_options.add_argument(f"user-agent={special_user_agent}")

    # Укажите корректный путь к chromedriver.exe
    service = Service(executable_path=r"C:\Users\anuar\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe")
    driver = webdriver.Chrome(service=service, options=chrome_options)

    try:
        driver.get("https://accounts.google.com/signin")
        time.sleep(2)  # Кратковременное ожидание загрузки страницы

        # Ввод email и переход к вводу пароля
        input_email(driver, email)
        time.sleep(10)  # Ожидание перехода на страницу ввода пароля

        # Ввод пароля
        input_password(driver, password)
        
        # Ждем завершения входа, можно использовать WebDriverWait для конкретного элемента после входа
        time.sleep(5)
        print("Попытка входа выполнена (проверьте состояние аккаунта).")
    except Exception as e:
        print("Ошибка при выполнении входа:", e)
    finally:
        driver.quit()

if __name__ == '__main__':
    main()
