In [None]:
from pyspark.sql import SparkSession, DataFrame
import pyspark.sql.functions as F
import pyspark.sql.types as t


def extract_data(spark: SparkSession) -> DataFrame: # читаем данные
    path = "worldcities.csv"
    return spark.read.option("header", "true").csv(path)


def transform_data(df: DataFrame, country = 'Russia') -> DataFrame: # считаем необходимые параметры по обпределленной стране
    output = (
        df
        .filter(F.col("country") == country)
        .groupBy("country")
        .agg(
            F.count("population").alias("count_cities"),
            F.round(F.avg("population").cast(t.FloatType())).alias("avg_population"),
            F.min(F.col("population").cast(t.FloatType())).alias("min_population"),
            F.max(F.col("population").cast(t.FloatType())).alias("max_population")
        )
        
    )
    return output


def save_data(df: DataFrame, pat = "output.csv") -> None: # сохраняем в csv
    df.write.mode("overwrite").format("csv").save(pat,header = 'true')
    

def main(): # запускаем 
    spark = SparkSession.builder.appName("cities").getOrCreate()
    df = extract_data(spark)
    output = transform_data(df)
    save_data(output)
    spark.stop()

main()