In [2]:
# Configurar a API com Flask
from flask import Flask, request, jsonify
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, to_date, col

In [3]:
app = Flask(__name__)

In [4]:
# Inicializar a SparkSession
spark = SparkSession.builder \
    .appName("Global Temperature Analysis by date by city API") \
    .getOrCreate()

24/09/03 21:30:16 WARN Utils: Your hostname, MacBook-Air-de-Wesley.local resolves to a loopback address: 127.0.0.1; using 192.168.0.161 instead (on interface en0)
24/09/03 21:30:16 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/09/03 21:30:17 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [5]:
# Carregar o dataset limpo
df = spark.read.csv('cleanedUnico_bytemperature_bydatabycity.csv/part-00000-059553c9-5662-487f-ae72-f4bab6705764-c000.csv', header=True, inferSchema=True)


                                                                                

In [6]:
# Certificar-se de que a coluna 'dt' está no formato de data
df = df.withColumn("dt", to_date(col("dt")))

In [7]:
@app.route('/get_countrys_temperature', methods=['GET'])
def get_temperature_by_country():
    country = request.args.get('country')
    df_filtered = df.filter(df['Country'] == country)
    avg_temp = df_filtered.agg(avg("AverageTemperature").alias("AverageTemperature")).collect()

    if avg_temp:
        temperature = avg_temp[0]['AverageTemperature']
        return jsonify({'country': country, 'average_temperature': temperature})
    else:
        return jsonify({'error': 'Country not found'}), 404

In [8]:
@app.route('/get_temperature_by_date_country', methods=['GET'])
def get_temperature_by_date_country():
    country = request.args.get('country')
    date = request.args.get('date')
    df_filtered = df.filter((df['Country'] == country) & (df['dt'] == date))
    avg_temp = df_filtered.agg(avg("AverageTemperature").alias("AverageTemperature")).collect()

    if avg_temp:
        temperature = avg_temp[0]['AverageTemperature']
        return jsonify({'country': country, 'date': date, 'average_temperature': temperature})
    else:
        return jsonify({'error': 'Temperature data not found for the given country and date'}), 404

In [9]:
@app.route('/get_temperature_by_city_date', methods=['GET'])
def get_temperature_by_city_date():
    city = request.args.get('city')
    date = request.args.get('date')
    df_filtered = df.filter((df['City'] == city) & (df['dt'] == date))
    avg_temp = df_filtered.agg(avg("AverageTemperature").alias("AverageTemperature")).collect()

    if avg_temp:
        temperature = avg_temp[0]['AverageTemperature']
        return jsonify({'city': city, 'date': date, 'average_temperature': temperature})
    else:
        return jsonify({'error': 'Temperature data not found for the given city and date'}), 404

In [10]:
@app.route('/get_temperature_by_country_city', methods=['GET'])
def get_temperature_by_country_city():
    country = request.args.get('country')
    city = request.args.get('city')
    df_filtered = df.filter((df['Country'] == country) & (df['City'] == city))
    avg_temp = df_filtered.agg(avg("AverageTemperature").alias("AverageTemperature")).collect()

    if avg_temp:
        temperature = avg_temp[0]['AverageTemperature']
        return jsonify({'country': country, 'city': city, 'average_temperature': temperature})
    else:
        return jsonify({'error': 'Temperature data not found for the given country and city'}), 404

In [11]:
@app.route('/get_temperature_by_country_date_range', methods=['GET'])
def get_temperature_by_country_date_range():
    country = request.args.get('country')
    start_date = request.args.get('start_date')
    end_date = request.args.get('end_date')
    
    df_filtered = df.filter(
        (df['Country'] == country) &
        (df['dt'] >= start_date) &
        (df['dt'] <= end_date)
    )
    
    df_filtered = df_filtered.select('dt', 'AverageTemperature').orderBy('dt')
    results = df_filtered.collect()
    
    if results:
        temperatures = [{'date': row['dt'].strftime('%Y-%m-%d'), 'temperature': row['AverageTemperature']} for row in results]
        return jsonify({'country': country, 'temperature_data': temperatures})
    else:
        return jsonify({'error': 'Temperature data not found for the given country and date range'}), 404


In [13]:
if __name__ == "__main__":
    app.run(port=5003)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5003
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [03/Sep/2024 21:57:37] "GET /get_countrys_temperature?country=Russia&date=1990-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 21:57:53] "GET /get_countrys_temperature?country=Brasil&date=1990-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 21:58:00] "GET /get_countrys_temperature?country=Brazil&date=1990-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 22:10:36] "GET /get_countrys_temperature?country=Brazil&date=1990-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 22:10:51] "GET /get_countrys_temperature?country=Brazil&date=1970-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 22:11:13] "GET /get_temperature_by_city_date?city=Moscow&date=1990-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 22:12:28] "GET /get_temperature_by_city_date?city=Salvador&date=1990-01-01 HTTP/1.1" 200 -
127.0.0.1 - - [03/Sep/2024 22:12:28] "GET /get_temperature_by_city_date?city=Salvador&date=1990-01-01 HTTP/1.1" 200 -
127.0.0