In [24]:
import requests
import json
import pandas as pd
import jsonlines
from sklearn.linear_model import LinearRegression
from sklearn.impute import KNNImputer

def fill_missing_values(data: pd.DataFrame) -> pd.DataFrame:
    """
    Интерполяция по времени позволяет заполнить пропуски, 
    используя соседние известные значения и информацию o времени.
    """
    # data['date'] = pd.to_datetime(data['date'])
    # data = data.set_index('date')
    data = data.interpolate(method='time', limit_direction='both')
    # Если вдруг остались незаполненные значения, то функция пытается заполнить их с помощью KNN, либо линейной регрессией
    imputer = KNNImputer(n_neighbors=5)
    data_filled = pd.DataFrame(imputer.fit_transform(data), columns=data.columns, index=data.index)
    for column in data_filled.columns[data_filled.isnull().any()]:
        missing_indices = data_filled[column].index[data_filled[column].isnull()]
        non_missing_indices = data_filled[column].index[~data_filled[column].isnull()]
        X = non_missing_indices.to_numpy().reshape(-1, 1)
        y = data_filled.loc[non_missing_indices, column].to_numpy().reshape(-1, 1)
        model = LinearRegression()
        model.fit(X, y)
        missing_values = model.predict(missing_indices.to_numpy().reshape(-1, 1))
        data_filled.loc[missing_indices, column] = missing_values.flatten()
    # data_filled.reset_index(inplace=True)
    return data_filled

# def get_weather_data(api_url, latitude, longitude, start_date, end_date, prob=0.5):
#     payload = {
#         "latitude": latitude,
#         "longitude": longitude,
#         "start_date": start_date,
#         "end_date": end_date,
#         "prob": prob
#     }
#     headers = {'Content-Type': 'application/json'}
#     response = requests.post(api_url, data=json.dumps(payload), headers=headers)
    
#     if response.status_code == 200:
#         # Convert the JSON response to a DataFrame
#         weather_data = json.loads(response.text)
#         weather_df = pd.DataFrame(data=weather_data['data'], columns=weather_data['columns'])
#         return weather_df
#     else:
#         raise Exception(f"Error {response.status_code}: {response.text}")


if __name__ == "__main__":
    # api_url = "http://localhost:8090"  # URL of the FastAPI service
    # latitude = 52.52
    # longitude = 13.41
    # start_date = "2024-05-02"
    # end_date = "2024-05-16"
    # prob = 0.5
    
    # try:
    #     weather_df = get_weather_data(f"{api_url}/weather-data", latitude, longitude, start_date, end_date, prob)
    #     print(weather_df.head())  # Display the first few rows of the DataFrame
    #     # Generate example DataFrames for mean and interpolation
    #     mean_df = weather_df.fillna(weather_df.mean())
    #     interp_df = weather_df.fillna(weather_df.mean())


    # except Exception as e:
    #     print(f"An error occurred: {e}")


    url2 = 'http://localhost:8092/process-csv' 
    url3 = "http://localhost:8092/rmse"
    start = 0
    # Получение батчей и расчет RMSE для каждого батча
    response = requests.post(url2, stream=True)
    if response.status_code == 200:
        with jsonlines.Reader(response.iter_lines()) as reader:
            for line in reader:
                data = line
                batch_df = pd.read_json(json.dumps(data['data']), orient='split') # батч получен

                # Заглушка
                without_nan = fill_missing_values(batch_df)

                # Отправка батча для расчета RMSE
                json_data = without_nan.to_json(orient='split')
                filled_data = {
                    "data": json.loads(json_data),
                    "service_class": "CSVProcessor",
                    "start": start
                }
                response2 = requests.post(url3, json=filled_data)
                start += len(batch_df)
    else:
        print("Ошибка при получении данных:", response.status_code)

    

    
    # #Сравнение двух одинаковых файлов для примера
    # start = 0
    # csv_file_path='data/weather_data.csv'
    # rmse_data = pd.read_csv(csv_file_path, skiprows=3, index_col='time')

    # for start in range(0, len(rmse_data), 100):
    #     batch = rmse_data.iloc[start:start + 100]
    #     data_json = batch.to_json(orient='split', date_format='iso')
    #     filled_data = {
    #         "data": json.loads(data_json),
    #         "service_class": "CSVProcessor",
    #         "start": start
    #     }
    #     response = requests.post(url3, json=filled_data)


KeyboardInterrupt: 