In [2]:
import pandas as pd
import os
from dawn_vok.utils.dir_utils import DirUtils

class IMSProviderParser:
    def __init__(self):
        self.df = None
        self.dir_path = DirUtils.get_raw_data_dir('provider/raw/ims')

    def load_csv(self,  file_name):
        """
        Load a CSV file into a pandas DataFrame.

        Parameters:
        file_path (str): The path to the CSV file.

        Returns:
        pd.DataFrame: The loaded DataFrame.
        """
        path = os.path.join(self.dir_path, file_name)
        try:
            self.df = pd.read_csv(path)
            return self.df
        except Exception as e:
            print(f"Failed to load file: {e}")
            return None
        
    def load_excel(self,  file_name):
        """
        Load an Excel file into a pandas DataFrame.

        Parameters:
        file_name (str): The name of the Excel file to load.

        Returns:
        pd.DataFrame: The loaded DataFrame.
        """
        path = os.path.join(self.dir_path, file_name)
        try:
            self.df = pd.read_excel(path)
            return self.df
        except Exception as e:
            print(f"Failed to load file: {e}")
            return None
            


In [3]:
imspp = IMSProviderParser()
imspp.load_csv('data_202504081930.csv')

Failed to load file: [Errno 2] No such file or directory: '/home/amiz/dawn/dawn_data/raw_data/provider/raw/ims/data_202504081930.csv'


In [5]:
import json
from dawn_vok.db.mongo_utils import MongoUtils


imspp = IMSProviderParser()
# imspp.load( 'data_202504081930.csv')
df = imspp.load_excel('ims_station.xlsx')
df = df[1:]
rcol = [
    "count",
    "old_api_id",
    "api_id",
    "api_rain_id",
    "heb_name",
    "name",
    "nig_e",
    "nig_n",
    "lat",
    "lon",
    "alt",
    "date",
    "variables",
    "shelter",
    "height",
    "wind_gauge_height",
    "notes"
]
df.columns
df.rename(columns=dict(zip(df.columns, rcol)), inplace=True)
df['name'] = df['name'].str.strip().str.replace(' ', '_').str.lower()
df['_id'] = 'ims_' + df['name'].astype(str) + '_' + df['api_id'].astype(str)


df.head()
di = df.to_dict(orient='records')
pt = DirUtils.get_raw_data_path('ims_stations.json', 'provider/raw/ims')
with open(pt, 'w') as f:
    #encode allowing hebrew characters
    js = json.dumps(di, default=str, ensure_ascii=False)
    f.write(js)




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns=dict(zip(df.columns, rcol)), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['name'] = df['name'].str.strip().str.replace(' ', '_').str.lower()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_id'] = 'ims_' + df['name'].astype(str) + '_' + df['api_id'].astype(str)


In [41]:


from dawn_vok.raw_data.plugins.data_plugin import IMSDataPlugin


plugin = IMSDataPlugin()
df = plugin.prepare_data('data_202504081940.csv')
print(df)

              Station Date & Time (UTC) Diffused radiation (W/m^2)  \
0      Haifa Technion  01/06/2023 00:00                          0   
1      Haifa Technion  01/06/2023 00:10                          0   
2      Haifa Technion  01/06/2023 00:20                          0   
3      Haifa Technion  01/06/2023 00:30                          0   
4      Haifa Technion  01/06/2023 00:40                          0   
...               ...               ...                        ...   
52647  Haifa Technion  01/06/2024 22:20                          0   
52648  Haifa Technion  01/06/2024 22:30                          0   
52649  Haifa Technion  01/06/2024 22:40                          0   
52650  Haifa Technion  01/06/2024 22:50                          0   
52651  Haifa Technion  01/06/2024 23:00                          0   

      Global radiation (W/m^2) Direct radiation (W/m^2) Relative humidity (%)  \
0                            0                        0                    48 

  self.df = pd.read_csv(path)


In [44]:
df.columns
# 'Station', 'Date & Time (UTC)', 'Relative humidity (%)',
#        'Temperature (°C)', 'Maximum temperature (°C)',
#        'Minimum temperature (°C)', 'Grass temperature (°C)', 'Rainfall (mm)'
# 'Station', 'Date & Time (UTC)', 'Diffused radiation (W/m^2)',
#        'Global radiation (W/m^2)', 'Direct radiation (W/m^2)',
#        'Relative humidity (%)', 'Temperature (°C)', 'Maximum temperature (°C)',
#        'Minimum temperature (°C)', 'Wet Temperature (°C)',
#        'Wind direction (°)', 'Gust wind direction (°)', 'Wind speed (m/s)',
#        'Maximum 1 minute wind speed (m/s)',
#        'Maximum 10 minutes wind speed (m/s)',
#        'Time ending maximum 10 minutes wind speed (hhmm)',
#        'Gust wind speed (m/s)', 'Standard deviation wind direction (°)',
#        'Rainfall (mm)'],
col_names ={'Station':'station', 'Date & Time (UTC)':'datetime', 'Relative humidity (%)':'humidity', 'Temperature (°C)':'temperature', 'Maximum temperature (°C)':'max_temperature', 'Minimum temperature (°C)':'min_temperature', 
            'Grass temperature (°C)':'grass_temperature', 'Rainfall (mm)':'rainfall',
            'Diffused radiation (W/m^2)':'diffused_radiation', 'Global radiation (W/m^2)':'global_radiation', 'Direct radiation (W/m^2)':'direct_radiation',
            'Wind direction (°)':'wind_direction', 'Gust wind direction (°)':'gust_wind_direction', 'Wind speed (m/s)':'wind_speed',
            'Maximum 1 minute wind speed (m/s)':'max_1_minute_wind_speed', 'Maximum 10 minutes wind speed (m/s)':'max_10_minute_wind_speed',
            'Time ending maximum 10 minutes wind speed (hhmm)':'time_ending_max_10_minute_wind_speed',
            'Gust wind speed (m/s)':'gust_wind_speed', 'Standard deviation wind direction (°)':'standard_deviation_wind_direction',
            'Wet Temperature (°C)':'wet_temperature'
            }
print(df.columns)
df.rename(columns=col_names, inplace=True)
print(df.columns)
df.set_index('datetime', inplace=True, drop=False)
df.index = pd.to_datetime(df.index, format='%d/%m/%Y %H:%M')
df.head()
# df.head()
# df.tail()
# df.info()
df.describe()


Index(['station', 'datetime', 'diffused_radiation', 'global_radiation',
       'direct_radiation', 'humidity', 'temperature', 'max_temperature',
       'min_temperature', 'Wet Temperature (°C)', 'wind_direction',
       'gust_wind_direction', 'wind_speed', 'max_1_minute_wind_speed',
       'max_10_minute_wind_speed', 'time_ending_max_10_minute_wind_speed',
       'gust_wind_speed', 'standard_deviation_wind_direction', 'rainfall'],
      dtype='object')
Index(['station', 'datetime', 'diffused_radiation', 'global_radiation',
       'direct_radiation', 'humidity', 'temperature', 'max_temperature',
       'min_temperature', 'wet_temperature', 'wind_direction',
       'gust_wind_direction', 'wind_speed', 'max_1_minute_wind_speed',
       'max_10_minute_wind_speed', 'time_ending_max_10_minute_wind_speed',
       'gust_wind_speed', 'standard_deviation_wind_direction', 'rainfall'],
      dtype='object')


Unnamed: 0,rainfall
count,52652.0
mean,0.019023
std,0.232269
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,15.0


In [39]:
full_range = pd.date_range(
        start=df.index.min().floor('D'),
        end=df.index.max().ceil('D') - pd.Timedelta(minutes=10),
        freq='10min'
    )

df1 = df.copy()
    # Set 'date' as index and reindex with full range
df1 = df1.reindex(full_range)
df1 = df1.interpolate(method='linear')
df1.head()
df1.describe()
df1.isnull().sum()
#fill nans with -5
df1.fillna(-5, inplace=True)
df1.isnull().sum()


  df1 = df1.interpolate(method='linear')


station              0
datetime             0
humidity             0
temperature          0
max_temperature      0
min_temperature      0
grass_temperature    0
rainfall             0
dtype: int64

In [73]:
from sentence_transformers import SentenceTransformer
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
embedding = model.encode("Tel Aviv is a coastal city with Mediterranean climate and high-tech agriculture.")
print("Embedding shape:", embedding.shape)


RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [74]:
import matplotlib
print("Backend:", matplotlib.get_backend())


Backend: module://matplotlib_inline.backend_inline


In [70]:
import torch
import torch.nn as nn

# Check if CUDA is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Dummy model
model = nn.Sequential(
    nn.Linear(10, 5),
    nn.ReLU(),
    nn.Linear(5, 2)
).to(device)

# Dummy input
x = torch.randn(1, 10).to(device)

# Run forward pass
with torch.no_grad():
    output = model(x)

print("Output:", output)


Using device: cuda


RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [72]:
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("Torch CUDA version:", torch.version.cuda)

if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("Compute Capability:", torch.cuda.get_device_capability(0))


Torch version: 2.5.1+cu121
CUDA available: True
Torch CUDA version: 12.1
GPU: NVIDIA GeForce RTX 5070 Ti
Compute Capability: (12, 0)


In [66]:
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version (from torch):", torch.version.cuda)
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

Torch version: 2.5.1+cu121
CUDA available: True
CUDA version (from torch): 12.1
GPU name: NVIDIA GeForce RTX 5070 Ti
