In [24]:
import pandas as pd
import pandera as pa
from pandera.typing import Index, DataFrame, Series
from datetime import datetime
from pandera import dtypes
import logging as log

import config
config.log

class InputSchema:
    class YellowTripData(pa.SchemaModel):
        VendorID: Series[int] = pa.Field(coerce=True, nullable=True)
        tpep_pickup_datetime: Series[pa.DateTime] = pa.Field(coerce=True)
        tpep_dropoff_datetime: Series[pa.DateTime] = pa.Field(coerce=True)
        passenger_count: Series[float] = pa.Field(coerce=True, nullable=True)
        trip_distance: Series[float] = pa.Field(coerce=True, nullable=True)
        RatecodeID: Series[float] = pa.Field(coerce=True, nullable=True)
        store_and_fwd_flag: Series[str] = pa.Field(coerce=True, nullable=True)
        PULocationID: Series[int] = pa.Field(coerce=True, nullable=True)
        DOLocationID: Series[int] = pa.Field(coerce=True, nullable=True)
        payment_type: Series[int] = pa.Field(coerce=True, nullable=True)
        fare_amount: Series[float] = pa.Field(coerce=True, nullable=True)
        extra: Series[float] = pa.Field(coerce=True, nullable=True)
        mta_tax: Series[float] = pa.Field(coerce=True, nullable=True)
        tip_amount: Series[float] = pa.Field(coerce=True, nullable=True)
        tolls_amount: Series[float] = pa.Field(coerce=True, nullable=True)
        improvement_surcharge: Series[float] = pa.Field(coerce=True, nullable=True)
        total_amount: Series[float] = pa.Field(coerce=True, nullable=True)
        congestion_surcharge: Series[float] = pa.Field(coerce=True, nullable=True)
        airport_fee: Series[float] = pa.Field(coerce=True, nullable=True)

class OutputSchema:
    class YellowTripData(InputSchema.YellowTripData):
        pass
    
       
@pa.check_types
def transform_YellowTripData(
    df: DataFrame[InputSchema.YellowTripData]) -> DataFrame[OutputSchema.YellowTripData]:
    log.info(f"Table YellowTripData validation complete. DataFrame shape: {df.shape}")
    return df
        
        
        
        

In [None]:
import pandas as pd
import pandera as pa
from pandera.typing import Index, DataFrame, Series
from datetime import datetime
from pandera import dtypes


class InputSchema:
    class GlobalTempSchema(pa.SchemaModel):
        dt: Series[pa.DateTime] = pa.Field(coerce=True)
        LandAverageTemperature: Series[float] = pa.Field(coerce=True, nullable=True)
        LandAverageTemperatureUncertainty: Series[float] = pa.Field(
            coerce=True, nullable=True
        )
        LandMaxTemperature: Series[float] = pa.Field(coerce=True, nullable=True)
        LandMaxTemperatureUncertainty: Series[float] = pa.Field(
            coerce=True, nullable=True
        )
        LandMinTemperatureUncertainty: Series[float] = pa.Field(
            coerce=True, nullable=True
        )
        LandAndOceanAverageTemperature: Series[float] = pa.Field(
            coerce=True, nullable=True
        )
        LandAndOceanAverageTemperatureUncertainty: Series[float] = pa.Field(
            coerce=True, nullable=True
        )

    class GlobalLandTemperature(pa.SchemaModel):
        dt: Series[pa.DateTime] = pa.Field(coerce=True)
        AverageTemperature: Series[float] = pa.Field(coerce=True, nullable=True)
        AverageTemperatureUncertainty: Series[float] = pa.Field(
            coerce=True, nullable=True
        )
        City: Series[str] = pa.Field(coerce=True, nullable=True)
        Country: Series[str] = pa.Field(coerce=True, nullable=True)
        Latitude: Series[str] = pa.Field(coerce=True, nullable=True)
        Longitude: Series[str] = pa.Field(coerce=True, nullable=True)


class OutputSchema:
    class GlobalTempSchema(InputSchema.GlobalTempSchema):
        pass

    class GlobalLandTemperature(InputSchema.GlobalLandTemperature):
        pass


@pa.check_types
def transform_globaltempschema(
    df: DataFrame[InputSchema.GlobalTempSchema],
) -> DataFrame[OutputSchema.GlobalTempSchema]:
    return df


@pa.check_types
def transform_globallandtemp(
    df: DataFrame[InputSchema.GlobalLandTemperature],
) -> DataFrame[OutputSchema.GlobalLandTemperature]:
    return df


In [1]:
import main

df = main.create_profit_table(month='05')

INFO:root:Table YellowTripData validation complete. DataFrame shape: (3588295, 19)
INFO:root:
Transform dtype: Complete
 Validate: Complete

INFO:root:DataFrame size: 448.29 MB
INFO:root:Data lenght: 3588295


In [3]:
df.to_excel('name.xlsx')

In [14]:
def f(drange = ""):
    if drange != "":
        print(drange)
    else:
        print('date')

In [19]:
drange = "2023"
f(drange=drange)

2023


In [16]:
import pandas as pd
import pandera as pa


dff = pd.DataFrame({
  "id":[1,2,3,4,None]     
})

dtt = pd.DataFrame({
  "id":[1,2,3,4,None]     
})

dtt['id'] = dtt['id'].astype(pd.Int64Dtype())

In [13]:
dff

Unnamed: 0,id
0,1.0
1,2.0
2,3.0
3,4.0
4,


In [14]:
dtt

Unnamed: 0,id
0,1.0
1,2.0
2,3.0
3,4.0
4,


In [10]:
dff.dtypes

id    int64
dtype: object

In [11]:
dtt.dtypes

id    Int64
dtype: object

In [9]:
dff.dtypes == dtt.dtypes

id    False
dtype: bool

In [24]:
round(2000/20)


100