In [7]:
import os

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


In [8]:
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
SPREADSHEET_ID = "1UZ6ZI3MWDeSO_w1joJPHb8_mgzRuL4OSAgpRmkjFjyo"
RANGE_NAME = "Dataset"

In [11]:
# Initialize credentials
credentials = None
if os.path.exists("token.json"):
    credentials = Credentials.from_authorized_user_file("token.json", SCOPES)
if not credentials or not credentials.valid:
    if credentials and credentials.expired and credentials.refresh_token:
        credentials.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file("credits.json", SCOPES)
        credentials = flow.run_local_server(port=0)
    with open("token.json", "w") as token:
        token.write(credentials.to_json())

try:
    # Build the service
    service = build("sheets", "v4", credentials=credentials)
    sheet = service.spreadsheets()

    # Call the Sheets API to fetch data
    result = sheet.values().get(spreadsheetId=SPREADSHEET_ID, range=RANGE_NAME).execute()
    values = result.get('values', [])

    if not values:
        print('No data found.')
    else:
        # Ensure the Output directory exists
        os.makedirs("Output", exist_ok=True)
        # Open or create a FASTA file for writing, skipping the first row
        with open("Output/Embeding_Sequences.fasta", "w") as fasta_file:
            for row in values[1:]:  # Skip the first row by starting from the second row
                # Assuming the name is in column B (index 1), gene in column C (index 2), Protein_id in column F (index 5), and the sequence in column Q (index 16)
                species = row[1] if len(row) > 1 else "Unknown"
                gene = row[2] if len(row) > 2 else "Unknown"
                Protein_id = row[5] if len(row) > 5 else "Unknown"
                sequence = row[16] if len(row) > 16 else "Unknown"
                # Write to FASTA format
                fasta_file.write(f">{species}|{gene}|{Protein_id}\n{sequence}\n")

except HttpError as error:
    print(error)