In [2]:
import pandas as pd 
import os


In [5]:
os.chdir("D:\Stackerbee\AgriCrops")

In [11]:


class CSVDataReader:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data_frame = None
    
    def read_csv_to_dataframe(self):
        try:
            # Read the CSV file into a pandas DataFrame, skipping initial rows
            self.data_frame = pd.read_csv(self.file_path)
            print(f"CSV file '{self.file_path}' successfully loaded into DataFrame.")
        except FileNotFoundError:
            print(f"Error: CSV file '{self.file_path}' not found.")
        except Exception as e:
            print(f"An error occurred while reading CSV file: {e}")
    
    def display_dataframe_info(self):
        if self.data_frame is not None:
            # Display DataFrame information
            print("\nDataFrame Info:")
            print(self.data_frame.info())
        else:
            print("DataFrame is empty. Please load data first.")

    def clean_data_types(self):
        if self.data_frame is not None:
            # Convert 'Arrivals (Tonnes)' to float if not already
            self.data_frame['Arrivals (Tonnes)'] = pd.to_numeric(self.data_frame['Arrivals (Tonnes)'], errors='coerce')
            
            # Convert 'Reported Date' to datetime with specified date format
            self.data_frame['Reported Date'] = pd.to_datetime(self.data_frame['Reported Date'], errors='coerce', format='%d-%b-%y')
            
            print("Data types cleaned successfully.")
        else:
            print("DataFrame is empty. Please load data first.")
    
    def display_dataframe_head(self, num_rows=5):
        if self.data_frame is not None:
            # Display the head of the DataFrame
            print(f"\nDataFrame Head (First {num_rows} rows):")
            print(self.data_frame.head(num_rows))
        else:
            print("DataFrame is empty. Please load data first.")
    
    def show_column_data_types(self):
        if self.data_frame is not None:
            # Display column names and their cleaned data types
            print("\nColumn Names and Data Types:")
            for column_name, data_type in self.data_frame.dtypes.items():
                # Clean column names for better display (removing leading/trailing whitespace)
                cleaned_column_name = column_name.strip()
                print(f"Column: {cleaned_column_name} - Data Type: {data_type}")
        else:
            print("DataFrame is empty. Please load data first.")
            
    # def save_dataframe_to_csv(self):
    #     if self.data_frame is not None:
    #         try:
    #             # Save the DataFrame back to the original CSV file, overwriting existing data
    #             self.data_frame.to_csv(self.file_path, index=False)
    #             print(f"DataFrame successfully saved to CSV file '{self.file_path}'.")
    #         except Exception as e:
    #             print(f"Error occurred while saving DataFrame to CSV file: {e}")
    #     else:
    #         print("DataFrame is empty. Please load and clean data first.")

# Example usage:
if __name__ == "__main__":
    # Instantiate CSVDataReader object with file path
    csv_reader = CSVDataReader('test.xls"')
    
    # Read CSV file into DataFrame
    csv_reader.read_csv_to_dataframe()
    
    # Display DataFrame information
    csv_reader.display_dataframe_info()


    #  # Clean data types for specific columns
    # csv_reader.clean_data_types()
    
    
    # Show column data types
    csv_reader.show_column_data_types()


    # Display head of DataFrame with updated columns
    csv_reader.display_dataframe_head()

    # Save cleaned DataFrame back to original CSV file
    csv_reader.save_dataframe_to_csv()


CSV file 'Chennagi Dal.csv' successfully loaded into DataFrame.

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Madhya Pradesh  80 non-null     object 
 1   Badwani         80 non-null     object 
 2   Balwadi         80 non-null     object 
 3   Gram Kata       80 non-null     object 
 4   Pulses          80 non-null     object 
 5   47.22           80 non-null     float64
 6   3190.0          80 non-null     float64
 7   3220.0          80 non-null     float64
 8   3204.0          80 non-null     float64
 9   2018-07-10      80 non-null     object 
dtypes: float64(4), object(6)
memory usage: 6.5+ KB
None

Column Names and Data Types:
Column: Madhya Pradesh - Data Type: object
Column: Badwani - Data Type: object
Column: Balwadi - Data Type: object
Column: Gram Kata - Data Type: object
Column: Pulses - Data Type: object
C