In [1]:
import pandas as pd

def get_csv_column_names(file_path):
    df = pd.read_csv(file_path)
    return df.columns.tolist()

# Usage
file_path = 'Almond1.csv'
columns = get_csv_column_names(file_path)
print(columns)

['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Min_Price', 'Max_Price', 'Modal_Price', 'Price_Date']


In [2]:
import pandas as pd
# Assuming your CSV file is named 'data.csv'
df = pd.read_csv('Almond1.csv')
# Get the unique values from the 'District_Name' column
unique_district_names = df['District_Name'].unique()
# Print the unique district names
print(unique_district_names)

['Amarawati' 'Aurangabad' 'Chandrapur' 'Mumbai' 'Jalgaon' 'Kolhapur'
 'Nagpur' 'Nashik' 'Pune' 'Sangli' 'Sholapur' 'Ahmednagar' 'Satara'
 'Nandurbar' 'Osmanabad' 'Raigad' 'Ratnagiri' 'Buldhana' 'Jalana' 'Latur'
 'Wardha' 'Nanded' 'Dhule']


In [3]:
import pandas as pd
# Assuming your CSV file is named 'data.csv'
df = pd.read_csv('Almond1.csv')
# Get the unique values from the 'District_Name' column
unique_district_names = df['Commodity'].unique()
# Print the unique district names
print(unique_district_names)

['Apple' 'Carrot' 'Cotton' 'Groundnut' 'Mango' 'Almond(Badam)' 'Banana'
 'Methi(Leaves)' 'Brinjal' 'Bajra(Pearl Millet/Cumbu)' 'Seetapal'
 'Sabu Dan' 'Sweet Potato']


### MIN PRICE (Training)

In [4]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import pickle

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('Almond1.csv')

# Extract the features (first 6 labels) and the target variable 'Modal_Price'
X = df[['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Price_Date']]
y = df['Min_Price']

# Feature engineering: Extract month and year from Price_Date
X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
X.drop('Price_Date', axis=1, inplace=True)

# List of categorical columns and numerical columns
categorical_features = ['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Month', 'Year']
numerical_features = ['Month', 'Year']

# Create pipelines for preprocessing
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Use ColumnTransformer to apply preprocessing to selected categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', 'passthrough', numerical_features)
    ],
    remainder='drop'
)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and linear regression model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
model.fit(X_train, y_train)

# Save the trained model to a file
with open('minprice.pkl', 'wb') as file:
    pickle.dump(model, file)


  X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
  X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop('Price_Date', axis=1, inplace=True)


### MIN PRICE (Testing)

In [5]:
import pandas as pd
import pickle

# Load the input data for prediction
input_row = {
    'District_Name': 'Chikmagalur',
    'Market_Name': 'Chikkamagalore',
    'Commodity': 'Antawala',
    'Variety': 'Antawala',
    'Grade': 'FAQ',
    'Price_Date': '9/6/2025'
}

# Load the saved model
with open('minprice.pkl', 'rb') as file:
    model = pickle.load(file)

# Create a DataFrame with the input row
input_df = pd.DataFrame([input_row])

# Feature engineering: Extract month and year from Price_Date
input_df['Month'] = pd.to_datetime(input_df['Price_Date']).dt.month
input_df['Year'] = pd.to_datetime(input_df['Price_Date']).dt.year
input_df.drop('Price_Date', axis=1, inplace=True)

# Make prediction using the loaded model
prediction = model.predict(input_df)
print("Predicted Min Price:", prediction[0])


Predicted Min Price: 8135.153414625267


### MAX PRICE (Training)

In [6]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import pickle

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('Almond1.csv')

# Extract the features (first 6 labels) and the target variable 'Modal_Price'
X = df[['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Price_Date']]
y = df['Max_Price']

# Feature engineering: Extract month and year from Price_Date
X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
X.drop('Price_Date', axis=1, inplace=True)

# List of categorical columns and numerical columns
categorical_features = ['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Month', 'Year']
numerical_features = ['Month', 'Year']

# Create pipelines for preprocessing
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Use ColumnTransformer to apply preprocessing to selected categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', 'passthrough', numerical_features)
    ],
    remainder='drop'
)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and linear regression model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
model.fit(X_train, y_train)

# Save the trained model to a file
with open('maxprice.pkl', 'wb') as file:
    pickle.dump(model, file)


  X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
  X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop('Price_Date', axis=1, inplace=True)


### MAX PRICE (Testing)

In [7]:
import pandas as pd
import pickle

# Load the input data for prediction
input_row = {
   'District_Name': 'Chikmagalur',
    'Market_Name': 'Chikkamagalore',
    'Commodity': 'Antawala',
    'Variety': 'Antawala',
    'Grade': 'FAQ',
    'Price_Date': '9/6/2025'
}

# Load the saved model
with open('maxprice.pkl', 'rb') as file:
    model = pickle.load(file)

# Create a DataFrame with the input row
input_df = pd.DataFrame([input_row])

# Feature engineering: Extract month and year from Price_Date
input_df['Month'] = pd.to_datetime(input_df['Price_Date']).dt.month
input_df['Year'] = pd.to_datetime(input_df['Price_Date']).dt.year
input_df.drop('Price_Date', axis=1, inplace=True)

# Make prediction using the loaded model
prediction = model.predict(input_df)
print("Predicted Max Price:", prediction[0])

Predicted Max Price: 11324.112440075027


### MODAL PRICE (Training)

In [8]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import pickle

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('Almond1.csv')

# Extract the features (first 6 labels) and the target variable 'Modal_Price'
X = df[['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Price_Date']]
y = df['Modal_Price']

# Feature engineering: Extract month and year from Price_Date
X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
X.drop('Price_Date', axis=1, inplace=True)

# List of categorical columns and numerical columns
categorical_features = ['District_Name', 'Market_Name', 'Commodity', 'Variety', 'Grade', 'Month', 'Year']
numerical_features = ['Month', 'Year']

# Create pipelines for preprocessing
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Use ColumnTransformer to apply preprocessing to selected categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', 'passthrough', numerical_features)
    ],
    remainder='drop'
)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and linear regression model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
model.fit(X_train, y_train)

# Save the trained model to a file
with open('modalprice.pkl', 'wb') as file:
    pickle.dump(model, file)


  X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Month'] = pd.to_datetime(X['Price_Date']).dt.month
  X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Year'] = pd.to_datetime(X['Price_Date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop('Price_Date', axis=1, inplace=True)


### MODAL PRICE (Testing)

In [9]:
import pandas as pd
import pickle

# Load the input data for prediction
input_row = {
    'District_Name': 'Chikmagalur',
    'Market_Name': 'Chikkamagalore',
    'Commodity': 'Antawala',
    'Variety': 'Antawala',
    'Grade': 'FAQ',
    'Price_Date': '23/7/2025'
}

# Load the saved model
with open('modalprice.pkl', 'rb') as file:
    model = pickle.load(file)

# Create a DataFrame with the input row
input_df = pd.DataFrame([input_row])

# Feature engineering: Extract month and year from Price_Date
input_df['Month'] = pd.to_datetime(input_df['Price_Date']).dt.month
input_df['Year'] = pd.to_datetime(input_df['Price_Date']).dt.year
input_df.drop('Price_Date', axis=1, inplace=True)

# Make prediction using the loaded model
prediction = model.predict(input_df)
print("Predicted Modal Price:", prediction[0])

Predicted Modal Price: 9659.693254036916


  input_df['Month'] = pd.to_datetime(input_df['Price_Date']).dt.month
  input_df['Year'] = pd.to_datetime(input_df['Price_Date']).dt.year
