# Scraping NFT Data for the Teddy Bears Club


This notebook demonstrates the process of scraping NFT data for the **Teddy Bears Club** from CNFT Tools:  
- [Round 1](https://cnft.tools/teddybearclub)
- [Round 2](https://cnft.tools/teddybearclub2)

If the JSON files for Round 1 and Round 2 NFTs (`tbc_roundone.json` and `tbc_roundtwo.json`) are missing, run this notebook to recreate them. Afterward, proceed to [`count_trait_values.ipynb`](count_trait_values.ipynb) and then [`rank_nfts_by_rarity.ipynb`](rank_nfts_by_rarity.ipynb) to complete the data processing pipeline.  

---
### 1.1 Open Statements

In [9]:
open System
open System.Collections.Generic
open System.Net.Http
open System.Text
open System.Text.Json
open System.Threading
open System.Threading.Tasks
open System.IO

### Workflow

1. **Saving Data (`saveToFile`)**:
   - Saves scraped data in a human-readable JSON format. Automatically creates directories if needed.

2. **Waiting for File Creation (`waitForFile`)**:
   - Ensures files are ready before proceeding, with a specified timeout.

3. **Scraping Data (`teddyBearClubScraper`)**:
   - Fetches metadata from the API across multiple pages and saves it to JSON files.
   - Parameters:
     - **`key`**: Specifies the data to extract (e.g., `"stats"`).
     - **`maxPage`**: Total pages to scrape.
     - **`url`**: API endpoint.
     - **`outputFilePath`**: Destination file path.

4. **Steps in This Notebook**:
   - **Round 1**: Scrape and save metadata to `tbc_roundone.json`.  
   - **Round 2**: Scrape and save metadata to `tbc_roundtwo.json`.  
   - **Inspect Data**: Display the first 10 NFTs from each round.


In [10]:
let saveToFile (data: obj) (filePath: string) =
    try
        if not (File.Exists(filePath)) then
            let dataFolder = Path.GetDirectoryName(filePath)
            if not (String.IsNullOrWhiteSpace(dataFolder) && Directory.Exists(dataFolder)) then
                Directory.CreateDirectory(dataFolder) |> ignore

            let options = JsonSerializerOptions(WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping)

            let jsonOutput = JsonSerializer.Serialize(data, options)
            File.WriteAllText(filePath, jsonOutput)
            printfn "Data successfully saved to %s" filePath
    with
    | ex -> printfn "Failed to save data: %s" ex.Message


In [11]:
let waitForFile filePath timeoutInSeconds =
    let startTime = System.DateTime.Now
    let rec checkFile () =
        if File.Exists(filePath) then
            printfn "File %s is ready." filePath
            true
        elif (System.DateTime.Now - startTime).TotalSeconds > float timeoutInSeconds then
            printfn "Timeout reached. File %s did not appear within %d seconds." filePath timeoutInSeconds
            false
        else
            printfn "Waiting for %s to appear..." filePath
            System.Threading.Thread.Sleep(5000)
            checkFile ()
    checkFile ()

### 1.3 Scraping

The `paginatedJSONScraper` function is designed to scrape Round 1 NFTs from [CNFT Tools - Round 1](https://cnft.tools/teddybearclub) and Round 2 NFTs from the [CNFT Tools  -Round 2](https://cnft.tools/teddybearclub2) website.

In [12]:
let nftData = List<JsonElement>()

let teddyBearClubScraper (key: string, maxPage: int, url: string, outputFilePath: string) : Task =
        async {
            nftData.Clear()
            use client = new HttpClient()
            client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)")
            
            let payloadTemplate = 
                {|
                    project = "none"
                    sort = "asc"
                    method = "rarity"
                    page = 1
                    priceOnly = "all"
                    filters = {| |}
                    sliders = 
                        {| 
                            minPrice = 0
                            maxPrice = 0
                            minRank = 0
                            maxRank = 0 
                        |}
                    instantSale = false
                    walletCheck = false
                    stakes = []
                    pageSize = 50
                |}

            let delayInMilliseconds = 100

            for page in 1 .. maxPage do
                printf "Fetching page %d..." page
                let payload = 
                    {| payloadTemplate with 
                        page = page
                    |}

                let jsonPayload = JsonSerializer.Serialize(payload)
                let content = new StringContent(jsonPayload, Encoding.UTF8, "application/json")

                try
                    let! response = client.PostAsync(url, content) |> Async.AwaitTask
                    if response.IsSuccessStatusCode then
                        let! responseString = response.Content.ReadAsStringAsync() |> Async.AwaitTask

                        let jsonResponse = JsonSerializer.Deserialize<Dictionary<string, obj>>(responseString)

                        match jsonResponse.TryGetValue(key) with
                        | true, results -> 
                            let resultsJson = results :?> JsonElement
                            if resultsJson.ValueKind = JsonValueKind.Array then
                                for element in resultsJson.EnumerateArray() do
                                    nftData.Add(element)
                            else
                                printfn $"Unexpected data format for {key}"
                        | _ -> ()

                    else
                        printfn "Failed to fetch page %d. Status code: %A" page response.StatusCode
                        return()
                with
                    | ex -> 
                        printfn "Error on page %d: %s" page ex.Message
                        return()

                do! Async.Sleep(delayInMilliseconds)

            printf "Data scraping complete"

            try
                saveToFile nftData outputFilePath
            with
                | ex -> printfn "Failed to write to file: %s" ex.Message

        } |> Async.StartAsTask :> Task    

### 1.4 Round 1

In [13]:
let url = "https://cnft.tools/toolsapi/v3/project/teddybearclub"
let outputFilePath = "./data/tbc_roundone.json"
teddyBearClubScraper("stats",17,url, outputFilePath) 

Unnamed: 0,Unnamed: 1
Id,3
Exception,<null>
Status,WaitingForActivation
IsCanceled,False
IsCompleted,False
IsCompletedSuccessfully,False
CreationOptions,
AsyncState,<null>
IsFaulted,False


In [None]:
waitForFile "./data/tbc_roundone.json" 200

let firstTenRoundOne = nftData |> Seq.take 10 |> Seq.toList
printfn "%A" firstTenRoundOne

### Round 2

In [None]:
let url = "https://cnft.tools/toolsapi/v3/project/teddybearclub2"
let outputFilePath = "./data/tbc_roundtwo.json"
teddyBearClubScraper("stats", 41, url, outputFilePath)

In [17]:
waitForFile "./data/tbc_roundtwo.json" 200

let firstTenRoundTwo = nftData |> Seq.take 10 |> Seq.toList
printfn "%A" firstTenRoundTwo

File ./data/tbc_roundtwo.json is ready.
[{
            "assetName": "TeddyBearsClub89",
            "assetID": "89",
            "name": "Teddy Bears Club #89",
            "price": "x",
            "cnftID": "x",
            "iconurl": "QmWBffPMZCGYrEEULxmVcoZjYdNkwqcTwJFYfsY3zN1a21",
            "url": "QmWBffPMZCGYrEEULxmVcoZjYdNkwqcTwJFYfsY3zN1a21",
            "Background": "Blue Sky",
            "Bear": "Gold Bear",
            "Clothes": "Red Devil Suit",
            "Face": "Fake Mustache ",
            "Handheld": "None",
            "Head": "Black Red Hat",
            "Skins": "None",
            "Trait Count": "5",
            "encodedName": "54656464794265617273436c75623839",
            "buildType": "robot",
            "rarityScore": "336",
            "rarityRank": "1",
            "prices": {},
            "listingDate": "x",
            "ownerStakeKey": "stake1u9sa02tk80rmjtfaat5u3vydm3kj7xacurwf2y7c34p0eng87r0c6"
        };
 {
            "assetName": "TeddyBearsClu