In [None]:
import asyncio, json, logging, os, sys

# Load ENVs
os.environ.update(json.load(open("local.settings.json"))["Values"])

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logging.getLogger("azure").setLevel(logging.WARNING)

# Define the maximum number of concurrent tasks
MAX_CONCURRENT_TASKS = 128

# Set ProactorEventLoop for Windows to bypass select() limitations
if sys.platform == "win32":
    loop = asyncio.ProactorEventLoop()
    asyncio.set_event_loop(loop)
    MAX_CONCURRENT_TASKS = 60

# Semaphore for limiting concurrency
SEMAPHORE = asyncio.Semaphore(MAX_CONCURRENT_TASKS)

In [None]:
from typing import Any, Dict
import aiohttp, asyncio, datetime, msal

# Define the number of retries you want to make
MAX_RETRIES = 3

# Define the maximum time you want to wait for a retry
MAX_WAIT_TIME = 15


class MSGraphClient:
    def __init__(self, version: str) -> None:
        """
        Initialize the MSGraphClient with the specified API version.

        Args:
            version (str): The version of the Microsoft Graph API to use.
        """
        self.version = version
        self.access_token = None
        self.expires_at = None
        self.app = msal.ConfidentialClientApplication(
            os.environ["MSGRAPH_CLIENT_ID"],
            authority="https://login.microsoftonline.com/{}".format(
                os.environ["MSGRAPH_TENANT_ID"]
            ),
            client_credential=os.environ["MSGRAPH_CLIENT_SECRET"],
        )

    async def get_access_token(self) -> str:
        """
        Acquire an access token for the Microsoft Graph API.

        Returns:
            str: The access token.

        Raises:
            Exception: If the token cannot be acquired.
        """
        if not self.access_token or self.expires_at <= datetime.datetime.now():
            token_response = self.app.acquire_token_silent(
                ["https://graph.microsoft.com/.default"], account=None
            )
            if not token_response:
                token_response = self.app.acquire_token_for_client(
                    ["https://graph.microsoft.com/.default"]
                )

            if "access_token" not in token_response:
                raise Exception(
                    "Could not acquire token: {}".format(
                        token_response.get("error_description", "Unknown error")
                    )
                )

            self.access_token = token_response["access_token"]
            expires_in = token_response.get("expires_in", 0)
            self.expires_at = datetime.datetime.now() + datetime.timedelta(
                seconds=expires_in
            )

        return self.access_token

    async def make_request(self, path: str, **params: Any) -> Dict[str, Any]:
        """
        Make an authenticated request to the Microsoft Graph API.

        Args:
            path (str): The API endpoint path.
            **params: Additional query parameters.

        Returns:
            dict: The response JSON data.

        Raises:
            Exception: If the request fails after the maximum retries.
        """
        if "https://graph.microsoft.com/" in path:
            url = path
        else:
            url = "https://graph.microsoft.com/{}/{}".format(self.version, path)

        retry_count = 0
        wait_time = 1
        error = None
        while retry_count <= MAX_RETRIES:
            headers = {
                "Authorization": "Bearer " + await self.get_access_token(),
                "Content-Type": "application/json",
            }
            if "filter" in params.keys():
                params["$filter"] = params["filter"]
                del params["filter"]

            async with aiohttp.ClientSession() as session:
                logger.info("REQUESTING | {}".format(url))
                async with session.get(url, headers=headers, params=params) as response:
                    if response.status == 200:
                        error = None
                        return await response.json()
                    elif response.status == 429 or response.status == 503:
                        logger.warning("THROTTLED | {}".format(url))
                        retry_after = int(
                            response.headers.get("Retry-After", wait_time)
                        )
                        wait_time = min(wait_time * 2, MAX_WAIT_TIME)
                        await asyncio.sleep(retry_after)
                        retry_count += 1
                    else:
                        message = "Error making request | {} | {} | {}".format(
                            url, response.status, await response.text()
                        )
                        logger.error(f"RETRYING | {message}")
                        error = Exception(message)
                        wait_time = min(wait_time * 2, MAX_WAIT_TIME)
                        await asyncio.sleep(wait_time)
                        retry_count += 1
        if error:
            raise error
        raise Exception("Maximum retries reached. Unable to make request.")

In [None]:
from azure.storage.blob.aio import ContainerClient
from azure.core.exceptions import ResourceExistsError
from typing import Any, Dict, List, AsyncGenerator
import asyncio, os


async def get_group_by_team_name(
    client: MSGraphClient, team_name: str
) -> Dict[str, Any]:
    """
    Get a group by its team name.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        team_name (str): The name of the team.

    Returns:
        dict: The group information.
    """
    try:
        response = await client.make_request(
            "groups",
            filter="resourceProvisioningOptions/Any(x:x eq 'Team') and displayName eq '{}'".format(
                team_name
            ),
        )
        return next(iter(response.get("value", [])), {})
    except Exception as e:
        logger.error(f"Failed to get group by team name: {e}")
        return {}


async def get_channels_for_group(
    client: MSGraphClient, group_id: str
) -> List[Dict[str, Any]]:
    """
    Get the channels for a specified group.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        group_id (str): The group ID.

    Returns:
        list: A list of channels.
    """
    try:
        response = await client.make_request(f"teams/{group_id}/channels")
        return response.get("value", [])
    except Exception as e:
        logger.error(f"Failed to get channels for group {group_id}: {e}")
        return []


async def get_drive_for_channel(
    client: MSGraphClient, group_id: str, channel_id: str
) -> Dict[str, Any]:
    """
    Get the drive associated with a channel.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        group_id (str): The group ID.
        channel_id (str): The channel ID.

    Returns:
        dict: The drive information.
    """
    try:
        return await client.make_request(
            f"teams/{group_id}/channels/{channel_id}/filesFolder"
        )
    except Exception as e:
        logger.error(f"Failed to get drive for channel {channel_id}: {e}")
        return {}


async def get_all_files_in_drive(
    client: MSGraphClient, drive_id: str, folder_id: str
) -> AsyncGenerator[Dict[str, Any], None]:
    """
    Get all files in a drive folder, recursively.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        drive_id (str): The drive ID.
        folder_id (str): The folder ID.

    Yields:
        dict: Information about each file.
    """
    select = "id,name,folder,file,size,@microsoft.graph.downloadUrl,parentReference"
    items = await client.make_request(
        "drives/{}/items/{}/children".format(drive_id, folder_id), select=select
    )

    while True:
        for item in items.get("value", []):
            if item.get("file"):
                yield item

            elif item.get("folder"):
                async for sub_item in get_all_files_in_drive(
                    client, drive_id, item["id"]
                ):
                    yield sub_item

        if "@odata.nextLink" in items:
            items = await client.make_request(items["@odata.nextLink"], select=select)
        else:
            break


async def process_file(
    container_client: ContainerClient, team_name: str, file: Dict[str, Any]
) -> None:
    """
    Process and upload a file to Azure Blob Storage.

    Args:
        container_client (ContainerClient): The Azure Blob Storage container client.
        team_name (str): The name of the team.
        file (dict): The file information.
    """
    async with SEMAPHORE:
        try:
            blob = container_client.get_blob_client(
                "{}/{}/{}".format(
                    team_name,
                    file["parentReference"]["path"].split(":/")[-1],
                    file["name"],
                )
            )

            if not await blob.exists():
                logger.info(f"STARTING {blob.blob_name}")
                await blob.start_copy_from_url(file["@microsoft.graph.downloadUrl"])
            else:
                blob_props = await blob.get_blob_properties()
                if file.get("size") != blob_props.get("size"):
                    if blob_props.copy.status != "pending":
                        logger.warning(f"RESTARTING {blob.blob_name}")
                        await blob.delete_blob()
                        await blob.start_copy_from_url(
                            file["@microsoft.graph.downloadUrl"]
                        )
                    else:
                        logger.info(f"TRANSFERING {blob.blob_name}")
        except ResourceExistsError:
            logger.warning(f"Blob {blob.blob_name} already exists.")
        except Exception as e:
            logger.error(f"Error processing file {file['name']}: {e}")


async def export_conversation_history(
    client: MSGraphClient,
    container_client: ContainerClient,
    group: dict,
    channel: dict,
    team_name: str,
) -> None:
    """
    Export the entire conversation history for a given channel and save it to a blob in JSON format.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        container_client (ContainerClient): The Azure Blob Storage container client.
        group_id (str): The group ID.
        channel_id (str): The channel ID.
        team_name (str): The name of the team.
    """
    try:
        messages = []
        url = "teams/{}/channels/{}/messages".format(group["id"], channel["id"])
        while url:
            response = await client.make_request(url)
            messages.extend(response.get("value", []))
            url = response.get("@odata.nextLink", None)

        blob = container_client.get_blob_client(
            "{}/{}_conversation_history.json".format(team_name, channel["displayName"])
        )
        if not await blob.exists():
            await blob.upload_blob(json.dumps(messages))
            logger.info(
                "Exported conversation history for channel {} in team {}.".format(
                    channel["displayName"], team_name
                )
            )
    except Exception as e:
        logger.error(
            "Failed to export conversation history for channel {} in team {}: {}".format(
                channel["displayName"], team_name, e
            )
        )


async def process_channel(
    client: MSGraphClient,
    container_client: ContainerClient,
    group: Dict[str, Any],
    team_name: str,
    channel: Dict[str, Any],
) -> None:
    """
    Process a channel by retrieving its drive and files, then uploading them to Azure Blob Storage.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        container_client (ContainerClient): The Azure Blob Storage container client.
        group (dict): The group information.
        team_name (str): The name of the team.
        channel (dict): The channel information.
    """
    async with SEMAPHORE:
        logger.info(
            "STARTING | Team: {} | Channel: {}".format(
                group["displayName"], channel["displayName"]
            )
        )
        drive = await get_drive_for_channel(client, group["id"], channel["id"])

        if drive:
            async for file in get_all_files_in_drive(
                client, drive["parentReference"]["driveId"], drive["id"]
            ):
                await process_file(container_client, team_name, file)
            await export_conversation_history(
                client, container_client, group, channel, team_name
            )
        logger.info(
            "FINISHED | Team: {} | Channel: {}".format(
                group["displayName"], channel["displayName"]
            )
        )


async def process_team(
    client: MSGraphClient, container_client: ContainerClient, team_name: str
) -> None:
    """
    Process a team by retrieving its group, channels, and files, then uploading them to Azure Blob Storage.

    Args:
        client (MSGraphClient): The MSGraphClient instance.
        container_client (ContainerClient): The Azure Blob Storage container client.
        team_name (str): The name of the team.
    """
    async with SEMAPHORE:
        group = await get_group_by_team_name(client, team_name)
        if group:
            logger.info("STARTING | Team: {}".format(group["displayName"]))
            channels = await get_channels_for_group(client, group["id"])
            channel_tasks = [
                process_channel(client, container_client, group, team_name, channel)
                for channel in channels
            ]
            await asyncio.gather(*channel_tasks)
            logger.info("FINISHED | Team: {}".format(group["displayName"]))

In [None]:
# Define the ContainerClient
container_client = ContainerClient.from_connection_string(
    os.environ["COLD_STORAGE"], "teams"
)

# Instantiate the MSGraphClient class
client = MSGraphClient("v1.0")
for team_name in [
    "Alive Wellness and Fitness",
    "Andersons Warehouse",
    "Appliance Barn by Sleep Center",
    "Big's Furniture",
    "Blue Bell Mattress - Gardner White",
    "C4 Repo Trailer Sales (Hopkins)",
    "Carolina Moving Solutions",
    "Carson Home Furnishings",
    "Carson Newman University",
    "Cheap Sleep",
    "City Furniture",
    "Coastal Urge",
    "Comfy Planets",
    "CrossFit Republic",
    "DD Mattress Co",
    "Divano Furniture",
    "Englander",
    "ESC Mattress Center",
    "Full House Furniture",
    "Furniture 4 Less Outlet",
    "Furniture World LV",
    "Germain Yachts",
    "Great American Home Store",
    "Happy's Home Centers",
    "HOM Furniture",
    "Home Suite Home",
    "Home Zone Furniture",
    "HTF Trailers (Hopkins)",
    "Jack and Jill Adult Superstore",
    "JB Windows",
    "Jenny's Stores",
    "Jonathan Stevens Mattress Co",
    "Joyology",
    "KidStrong",
    "KitchenSearch.com",
    "Kueblers Furniture",
    "Liquid Planet Grille",
    "M&D Trailer Sales (Hopkins)",
    "Mack Mattress",
    "Mattress Clearance Center of NWA",
    "Mattress Clinic",
    "Mattress Hub",
    "Mattress on Demand",
    "Mattress Sleep Centers",
    "Mattress Store (St. George)",
    "MattressLand",
    "Michael Alan",
    "MPG - Designs In Stone",
    "MPG - Merridian Home Furnishings",
    "MPG - World Class Services",
    "MPG Biscuit Belly",
    "MPG Buff City Soap",
    "MPG Carnes Trucking",
    "MPG Cashel Fly",
    "MPG Churchill Downs",
    "MPG Discovery Builders",
    "MPG Downey Trucking",
    "MPG Fasig Tipton",
    "MPG Franklin Brazing",
    "MPG KSculpt",
    "MPG Mark's Feed Store",
    "MPG Merck Animal Health Equine",
    "MPG PDQ Doors",
    "MPG PXG",
    "MPG Ritchie Industries",
    "MPG Sportsmens Alliance",
    "MPG Thorntons",
    "MPG Zoom Room Florida",
    "MyHome Center",
    "Old Westbury",
    "Pantibiotic",
    "Pure Glow",
    "REM Sleep Solutions",
    "Rent-Buy Texas",
    "Revenge Rum",
    "Rosso Furniture",
    "RTB ABP",
    "RTB Burlington Channel",
    "RTB CO Denver",
    "RTB FL Estero",
    "RTB GA Atlanta",
    "RTB TN Memphis",
    "Ryan Testing Team",
    "Saslow & Henebry's Jewelry Stores",
    "Scott & Eric Podcast Team",
    "Snooze - Pueblo West",
    "Sofas 2 Furnishings",
    "Sparks Furniture",
    "St. Margaret's School",
    "Straight Blast Gym",
    "SunMed",
    "SUNY Buffalo",
    "SUNY Fredonia",
    "The American Furniture",
    "The FAM",
    "The Joint - MA",
    "The Joint – Massachusetts",
    "The Joint Detroit",
    "The Joint Weymouth",
    "Tyndall Furniture",
    "Uneeda Shed (Hopkins)",
    "Walker Furniture (LV)",
    "Wright Entertainment Group",
    "38 Special Luxury RV Resort",
]:
    await process_team(client, container_client, team_name)