# Webscraping the Teddy Bear Club NFT Data

#### This notebook details the process of scraping the Round 1 and 2 NFTs from the official Teddy Bears Club website ([Teddy Bears Club](https://tbc.teddyswap.org/)) and the [CNFT Tools](https://cnft.tools/teddybearclub2) website. It also allows you to recreate the JSON files in case they have been deleted. If that is the case, run this file first, followed by `attributecounter.ipynb` and then `rankdata.ipynb`.
---
### 1.1 Open Statements

In [1]:
open System
open System.Collections.Generic
open System.Net.Http
open System.Text
open System.Text.Json
open System.Threading.Tasks
open System.IO

### 1.2 `saveToFile`: saves the given JSON into the given file path
It is called by the scraping functions to serialize and save the scraped data.

In [2]:
let saveToFile (data: obj) (filePath: string) =
    try
        let dataFolder = Path.GetDirectoryName(filePath)
        if not (String.IsNullOrWhiteSpace(dataFolder) && Directory.Exists(dataFolder)) then
            Directory.CreateDirectory(dataFolder) |> ignore

        let options = JsonSerializerOptions(WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping)

        let jsonOutput = JsonSerializer.Serialize(data, options)
        File.WriteAllText(filePath, jsonOutput)
        printfn "Data successfully saved to %s" filePath
    with
    | ex -> printfn "Failed to save data: %s" ex.Message


## 1.3 Scraping
### Teddy Bear Club Round 1
The function `directJSONScraper` is used to scare Round 1 NFTs the official Teddy Bears Club website ([Teddy Bears Club](https://tbc.teddyswap.org/)). 

In [3]:
let nftRound1Data = List<JsonElement>()

let directJSONScraper (url: string, outputFilePath: string) : Task =
        async {
            nftRound1Data.Clear()
            use client = new HttpClient()
            client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)")

            try
                printfn "Fetching data from %s..." url
                let! response = client.GetStringAsync(url) |> Async.AwaitTask

                let jsonArray = JsonSerializer.Deserialize<JsonElement[]>(response)

                if jsonArray <> null then
                    for item in jsonArray do
                        nftRound1Data.Add(item)
                    
                    //let jsonOutput = JsonSerializer.Serialize(nftRound1Data)
                    saveToFile nftRound1Data outputFilePath
            with
                | ex -> printfn "Error fetching data: %s" ex.Message
        } |> Async.StartAsTask :> Task 

### Teddy Bear Club Round 2

The `paginatedJSONScraper` function is designed to scrape Round 2 NFTs from the [CNFT Tools](https://cnft.tools/teddybearclub2) website. Unlike the `directJSONScraper` function, it handles pagination and requires a payload to retrieve the data efficiently. 

In [4]:
let nftRound2Data = List<JsonElement>()

let paginatedJSONScraper (key: string, url: string, outputFilePath: string) : Task =
        async {
            nftRound2Data.Clear()
            use client = new HttpClient()
            client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)")
            
            let payloadTemplate = 
                {|
                    project = "none"
                    sort = "asc"
                    method = "rarity"
                    page = 1
                    priceOnly = "all"
                    filters = {| |}
                    sliders = 
                        {| 
                            minPrice = 0
                            maxPrice = 0
                            minRank = 0
                            maxRank = 0 
                        |}
                    instantSale = false
                    walletCheck = false
                    stakes = []
                    pageSize = 50
                |}

            let totalPages = 41
            let delayInMilliseconds = 1000

            for page in 1 .. totalPages do
                printf "Fetching page %d..." page
                let payload = 
                    {| payloadTemplate with 
                        page = page
                    |}

                let jsonPayload = JsonSerializer.Serialize(payload)
                let content = new StringContent(jsonPayload, Encoding.UTF8, "application/json")

                try
                    let! response = client.PostAsync(url, content) |> Async.AwaitTask
                    if response.IsSuccessStatusCode then
                        let! responseString = response.Content.ReadAsStringAsync() |> Async.AwaitTask

                        let jsonResponse = JsonSerializer.Deserialize<Dictionary<string, obj>>(responseString)

                        match jsonResponse.TryGetValue(key) with
                        | true, results -> 
                            let resultsJson = results :?> JsonElement
                            if resultsJson.ValueKind = JsonValueKind.Array then
                                for element in resultsJson.EnumerateArray() do
                                    nftRound2Data.Add(element)
                            else
                                printfn $"Unexpected data format for {key}"
                        | _ -> ()

                    else
                        printfn "Failed to fetch page %d. Status code: %A" page response.StatusCode
                        return()
                with
                    | ex -> 
                        printfn "Error on page %d: %s" page ex.Message
                        return()

                do! Async.Sleep(delayInMilliseconds)

            printf "Data scraping complete"

            try
                saveToFile nftRound2Data outputFilePath
            with
                | ex -> printfn "Failed to write to file: %s" ex.Message

        } |> Async.StartAsTask :> Task    

## 1.6 Call the Function for Each of the NFT Rounds

In [None]:
let url = "https://tbc.teddyswap.org/rankedAssets.json"
let outputFilePath = "../data/collection_1.json"
directJSONScraper(url, outputFilePath) 

In [None]:
let url = "https://cnft.tools/toolsapi/v3/project/teddybearclub2"
let outputFilePath = "../data/collection_2.json"
paginatedJSONScraper("stats", url, outputFilePath)

## 1.5 Output
Lastly, we print out a few of the gathered data for validation. This may take a moment as the function is still in the process of scraping and saving the data.

In [9]:
let firstTenR1 = nftRound1Data |> Seq.take 10 |> Seq.toList
printfn "%A" firstTenR1

[{
        "assetName": "TeddyBearsClub89",
        "assetID": "89",
        "name": "Teddy Bears Club #89",
        "price": "x",
        "cnftID": "x",
        "iconurl": "QmWBffPMZCGYrEEULxmVcoZjYdNkwqcTwJFYfsY3zN1a21",
        "url": "QmWBffPMZCGYrEEULxmVcoZjYdNkwqcTwJFYfsY3zN1a21",
        "Background": "Blue Sky",
        "Bear": "Gold Bear",
        "Clothes": "Red Devil Suit",
        "Face": "Fake Mustache ",
        "Handheld": "None",
        "Head": "Black Red Hat",
        "Skins": "None",
        "Trait Count": "5",
        "encodedName": "54656464794265617273436c75623839",
        "buildType": "robot",
        "rarityScore": "336",
        "rarityRank": "1",
        "prices": {},
        "listingDate": "x",
        "ownerStakeKey": "stake1u9sa02tk80rmjtfaat5u3vydm3kj7xacurwf2y7c34p0eng87r0c6"
    };
 {
        "assetName": "TeddyBearsClub7180",
        "assetID": "7180",
        "name": "Teddy Bears Club #7180",
        "price": "x",
        "cnftID": "x",
        "iconu

In [10]:
let firstTenR2 = nftRound2Data |> Seq.take 10 |> Seq.toList
printfn "%A" firstTenR2

[{
            "assetName": "TeddyBearsClub869",
            "assetID": "869",
            "name": "Teddy Bears Club #869",
            "price": "x",
            "cnftID": "x",
            "iconurl": "QmYQJ2ZbyNCJYcd8xoWP7oMsRK74RpZcPHRo825nNMZHmW",
            "url": "QmYQJ2ZbyNCJYcd8xoWP7oMsRK74RpZcPHRo825nNMZHmW",
            "Background": "Room",
            "Bear": "Pink Bear",
            "Clothes": "Full Space Suit",
            "Face": "wink 2",
            "Handheld": "Clown Lollipop",
            "Head": "None",
            "Skins": "None",
            "Trait Count": "5",
            "encodedName": "54656464794265617273436c7562383639",
            "buildType": "robot",
            "rarityScore": "841",
            "rarityRank": "1",
            "prices": {},
            "listingDate": "x",
            "ownerStakeKey": "stake1uy67jvetyl2usuhlnlg85t4vfcq07ung5fzp0gd5fs4m4mcke6277"
        };
 {
            "assetName": "TeddyBearsClub909",
            "assetID": "909",
        