Source: https://platform.stratascratch.com/data-projects/data-transformation

### Imports

In [154]:
import pandas as pd
import json

### Initial data intake

In [155]:
level1 = pd.read_json("./datasets/case.json")
# Convert string to list or dict object
level1["Payload"] = level1["Payload"].apply(json.loads)

# Convert to UTC-3
level1["EnqueuedTimeUtc"] = (
    pd.to_datetime(level1["EnqueuedTimeUtc"])
    .dt.tz_convert("Etc/GMT+3")
    .dt.strftime("%d/%m/%Y %H:%M:%S")
)

level1.head(5)

Unnamed: 0,EnqueuedTimeUtc,EventName,Payload
0,05/09/2021 05:04:08,DynamicPrice_Result,"{'provider': 'ApplyDynamicPriceRange', 'offerI..."
1,18/08/2021 08:43:23,DynamicPrice_Result,"{'provider': 'ApplyDynamicPricePerOption', 'of..."
2,05/09/2021 06:04:04,DynamicPrice_Result,"{'provider': 'ApplyDynamicPriceRange', 'offerI..."
3,25/08/2021 02:02:55,CurateOffer_Result,"[{'curationProvider': 'ByPrice', 'offerId': '1..."
4,05/09/2021 05:03:28,DynamicPrice_Result,"{'provider': 'ApplyDynamicPriceRange', 'offerI..."


In [156]:
level2_CurateOffer = level1.query("EventName =='CurateOffer_Result'")
level2_DynamicPrice_Result = level1.query("EventName == 'DynamicPrice_Result'")

### Create df_CurateOffer

In [157]:
# Pack 1st unnested dataframe back to JSON
js_CurateOffer = json.loads(level2_CurateOffer.to_json(orient="records"))

df_CurateOffer = pd.json_normalize(
    js_CurateOffer,
    record_path=["Payload", "options"],
    meta=[
        ["Payload", "curationProvider"],
        ["Payload", "offerId"],
        ["Payload", "dealerId"],
        "EnqueuedTimeUtc",
    ],
)

df_CurateOffer.head(5)

Unnamed: 0,uniqueOptionId,optionId,isMobileDealer,isOpen,eta,chamaScore,productBrand,isWinner,minimumPrice,maximumPrice,dynamicPrice,finalPrice,defeatPrimaryReason,defeatReasons,Payload.curationProvider,Payload.offerId,Payload.dealerId,EnqueuedTimeUtc
0,b0e296a9-0590-f0e0-8211-243a2ededb12,6517 || dd839e4c-9f84-45eb-9cb2-9069fecf70f2,True,True,1:00,8.0,ULTRAGAZ,True,90.0,180.0,91.9,91.9,,,ByPrice,149f0e53-ff85-425f-a01a-8710f06704ea,6517,25/08/2021 02:02:55
1,d6562c24-0b37-5fb4-8275-65b7b8b47b87,6517 || 6517,False,False,0:01,8.0,ULTRAGAZ,False,90.0,180.0,91.9,91.9,Closed,"[Closed, HasDriverInOffer]",ByPrice,149f0e53-ff85-425f-a01a-8710f06704ea,6517,25/08/2021 02:02:55
2,8d0f9262-f543-d0c8-a869-33985ae3ecda,9047 || 9047 || ULTRAGAZ,False,False,1:00,9.0,ULTRAGAZ,False,99.0,198.0,99.95,99.95,Closed,"[Closed, HigherPrice, HasDriverInOffer]",ByPrice,149f0e53-ff85-425f-a01a-8710f06704ea,9047,25/08/2021 02:02:55
3,3cd346f4-d297-7568-2e50-d43a8e2fd0a9,9047 || 9047 || CONSIGAZ,False,False,1:00,9.0,CONSIGAZ,False,89.99,179.98,91.89,91.89,Closed,"[Closed, HigherPrice, HigherETA]",ByPrice,149f0e53-ff85-425f-a01a-8710f06704ea,9047,25/08/2021 02:02:55
4,577e4bbd-f49d-ac23-56a6-e70072a05229,9047 || 9047 || LIQUIGAS,False,False,1:00,9.0,LIQUIGAS,False,92.0,184.0,93.9,93.9,Closed,"[Closed, HigherPrice, HigherETA]",ByPrice,149f0e53-ff85-425f-a01a-8710f06704ea,9047,25/08/2021 02:02:55


### Process DynamicPrice

In [158]:
level2_DynamicPrice_Result.head(5)

Unnamed: 0,EnqueuedTimeUtc,EventName,Payload
0,05/09/2021 05:04:08,DynamicPrice_Result,"{'provider': 'ApplyDynamicPriceRange', 'offerI..."
1,18/08/2021 08:43:23,DynamicPrice_Result,"{'provider': 'ApplyDynamicPricePerOption', 'of..."
2,05/09/2021 06:04:04,DynamicPrice_Result,"{'provider': 'ApplyDynamicPriceRange', 'offerI..."
4,05/09/2021 05:03:28,DynamicPrice_Result,"{'provider': 'ApplyDynamicPriceRange', 'offerI..."
5,18/08/2021 08:44:00,DynamicPrice_Result,"{'provider': 'ApplyDynamicPricePerOption', 'of..."


In [159]:
js_DynamicPrice = json.loads(level2_DynamicPrice_Result.to_json(orient="records"))
js_DynamicPrice[1]

{'EnqueuedTimeUtc': '18/08/2021 08:43:23',
 'EventName': 'DynamicPrice_Result',
 'Payload': {'provider': 'ApplyDynamicPricePerOption',
  'offerId': '56e0702c-0218-4626-8d3d-ae9d54b4503b',
  'algorithmOutput': [{'uniqueOptionId': 'b0e296a9-0590-f0e0-8211-243a2ededb12',
    'bestPrice': 92.45},
   {'uniqueOptionId': 'd6562c24-0b37-5fb4-8275-65b7b8b47b87',
    'bestPrice': 92.45},
   {'uniqueOptionId': '8d0f9262-f543-d0c8-a869-33985ae3ecda',
    'bestPrice': 92.45},
   {'uniqueOptionId': '151e59ac-761a-96f5-d2b9-882037a9fd28',
    'bestPrice': 94.6},
   {'uniqueOptionId': '3cd346f4-d297-7568-2e50-d43a8e2fd0a9',
    'bestPrice': 94.6},
   {'uniqueOptionId': 'b7a7b6d1-4dae-7392-5aaf-f3369c29db1d',
    'bestPrice': 93.0},
   {'uniqueOptionId': '577e4bbd-f49d-ac23-56a6-e70072a05229',
    'bestPrice': 93.0},
   {'uniqueOptionId': 'f9b876ab-2590-952f-d69d-5b352ec251f3',
    'bestPrice': 91.35}]}}

In [171]:
df_DynamicPrice = pd.json_normalize(js_DynamicPrice)
df_DynamicPriceOption = df_DynamicPrice[
    df_DynamicPrice["Payload.provider"] == "ApplyDynamicPricePerOption"
]
df_DynamicPriceRange = df_DynamicPrice[
    df_DynamicPrice["Payload.provider"] == "ApplyDynamicPriceRange"
]
df_DynamicPriceOption.head(5)

Unnamed: 0,EnqueuedTimeUtc,EventName,Payload.provider,Payload.offerId,Payload.algorithmOutput.min_global,Payload.algorithmOutput.min_recommended,Payload.algorithmOutput.max_recommended,Payload.algorithmOutput.differenceMinRecommendMinTheory,Payload.algorithmOutput
1,18/08/2021 08:43:23,DynamicPrice_Result,ApplyDynamicPricePerOption,56e0702c-0218-4626-8d3d-ae9d54b4503b,,,,,[{'uniqueOptionId': 'b0e296a9-0590-f0e0-8211-2...
4,18/08/2021 08:44:00,DynamicPrice_Result,ApplyDynamicPricePerOption,00991873-194e-4a6e-89c9-8f68668b6aaa,,,,,[{'uniqueOptionId': 'b0e296a9-0590-f0e0-8211-2...
7,18/08/2021 08:44:00,DynamicPrice_Result,ApplyDynamicPricePerOption,00991873-194e-4a6e-89c9-8f68668b6aaa,,,,,[{'uniqueOptionId': 'b0e296a9-0590-f0e0-8211-2...
11,18/08/2021 08:44:04,DynamicPrice_Result,ApplyDynamicPricePerOption,7a471a13-7922-4e7a-9667-9b2f96b44a2a,,,,,[{'uniqueOptionId': 'b0e296a9-0590-f0e0-8211-2...
16,18/08/2021 08:43:57,DynamicPrice_Result,ApplyDynamicPricePerOption,7cb20a39-c7f9-4c4c-9d51-94dbb8c4ff07,,,,,[{'uniqueOptionId': '9c4ec643-934f-56d1-198d-4...


#### Create df_DynamicPriceOption

In [170]:
js_DynamicPriceOption = json.loads(
    df_DynamicPriceOption[
        [
            "EnqueuedTimeUtc",
            "Payload.provider",
            "Payload.offerId",
            "Payload.algorithmOutput",
        ]
    ].to_json(orient="records")
)
df_DynamicPriceOption2 = (
    pd.json_normalize(
        js_DynamicPriceOption,
        record_path="Payload.algorithmOutput",
        meta=["EnqueuedTimeUtc", "Payload.provider", "Payload.offerId"],
    )
    .assign(
        Provider=lambda x: '"' + x["Payload.provider"] + '"',
        uniqueOptionId=lambda x: '"' + x["uniqueOptionId"] + '"',
        OfferId=lambda x: '"' + x["Payload.offerId"] + '"',
    )
    .rename({"bestPrice": "BestPrice", "EnqueuedTimeUtc": "EnqueuedTimeSP"}, axis=1)
)
df_DynamicPriceOption2 = df_DynamicPriceOption2[
    ["Provider", "OfferId", "uniqueOptionId", "BestPrice", "EnqueuedTimeSP"]
]
df_DynamicPriceOption2.head(5)

Unnamed: 0,Provider,OfferId,uniqueOptionId,BestPrice,EnqueuedTimeSP
0,"""ApplyDynamicPricePerOption""","""56e0702c-0218-4626-8d3d-ae9d54b4503b""","""b0e296a9-0590-f0e0-8211-243a2ededb12""",92.45,18/08/2021 08:43:23
1,"""ApplyDynamicPricePerOption""","""56e0702c-0218-4626-8d3d-ae9d54b4503b""","""d6562c24-0b37-5fb4-8275-65b7b8b47b87""",92.45,18/08/2021 08:43:23
2,"""ApplyDynamicPricePerOption""","""56e0702c-0218-4626-8d3d-ae9d54b4503b""","""8d0f9262-f543-d0c8-a869-33985ae3ecda""",92.45,18/08/2021 08:43:23
3,"""ApplyDynamicPricePerOption""","""56e0702c-0218-4626-8d3d-ae9d54b4503b""","""151e59ac-761a-96f5-d2b9-882037a9fd28""",94.6,18/08/2021 08:43:23
4,"""ApplyDynamicPricePerOption""","""56e0702c-0218-4626-8d3d-ae9d54b4503b""","""3cd346f4-d297-7568-2e50-d43a8e2fd0a9""",94.6,18/08/2021 08:43:23


#### Create df_DynamicPriceRange

In [162]:
df_DynamicPriceRange.drop("Payload.algorithmOutput", axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_DynamicPriceRange.drop('Payload.algorithmOutput', axis = 1, inplace=True)


In [163]:
df_CurateOffer.to_csv("./outputs/CuratedOfferOptions.csv")
df_DynamicPriceOption.to_csv("./outputs/DynamicPriceOption.csv")
df_DynamicPriceRange.to_csv("./outputs/DynamicPriceRange.csv")