In [10]:
import numpy as np

In [11]:
class ClothingRecommendationEnv:
    def __init__(self, body_measurements, gender, style_preference, skin_tone):
        self.body_measurements = body_measurements
        self.gender = gender
        self.style_preference = style_preference
        self.skin_tone = skin_tone
        self.state = self._get_initial_state()
        self.rewards = self._initialize_rewards()

    def _get_initial_state(self):
        state = np.array([
            self.body_measurements['height'],
            1 if self.gender == 'male' else 0,
            0 if self.style_preference == 'full_body' else 1 if self.style_preference == 'upper_body' else 2,
            self.skin_tone
        ])
        return state

    def _initialize_rewards(self):
        # Example: Assuming 100 states with 4 possible actions each
        rewards = np.random.rand(100, 4)
        return rewards

    def reset(self):
        self.state = self._get_initial_state()
        return self.state

    def step(self, action):
        # For simplicity, keep state unchanged
        new_state = self.state
        # Simplified indexing of rewards for the current state
        state_index = int(sum(self.state)) % 100  # Example: simple hashing to map to rewards
        reward = self.rewards[state_index, action]
        return new_state, reward

# Example usage
body_measurements = {'height': 170,}
gender = 'female'
style_preference = 'upper_body'
skin_tone = 4  # Example value for skin tone
env = ClothingRecommendationEnv(body_measurements, gender, style_preference, skin_tone)

In [12]:
class SimpleNN:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.w1 = np.random.randn(input_dim, hidden_dim)
        self.b1 = np.zeros(hidden_dim)
        self.w2 = np.random.randn(hidden_dim, output_dim)
        self.b2 = np.zeros(output_dim)
        
    def forward(self, x):
        self.z1 = np.dot(x, self.w1) + self.b1
        self.a1 = np.maximum(0, self.z1)  # ReLU activation
        self.z2 = np.dot(self.a1, self.w2) + self.b2
        return self.z2
    
    def backward(self, x, y, y_pred, learning_rate):
        loss = y_pred - y
        dz2 = loss
        dw2 = np.dot(self.a1.T, dz2)
        db2 = np.sum(dz2, axis=0)

        dz1 = np.dot(dz2, self.w2.T)
        dz1[self.z1 <= 0] = 0
        dw1 = np.dot(x.T, dz1)
        db1 = np.sum(dz1, axis=0)

        self.w1 -= learning_rate * dw1
        self.b1 -= learning_rate * db1
        self.w2 -= learning_rate * dw2
        self.b2 -= learning_rate * db2

    def predict(self, x):
        return self.forward(x)

# Example usage
nn = SimpleNN(input_dim=4, hidden_dim=10, output_dim=4)


In [13]:
class Agent:
    def __init__(self, env, learning_rate=0.001, gamma=0.99, epsilon=0.2):
        self.env = env
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_network = SimpleNN(input_dim=4, hidden_dim=10, output_dim=4)
        self.learning_rate = learning_rate

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(3)  # Random action
        q_values = self.q_network.predict(state)
        return np.argmax(q_values)

    def learn(self, state, action, reward, next_state):
        q_values = self.q_network.predict(state)
        next_q_values = self.q_network.predict(next_state)

        target = q_values.copy()
        target[0, action] = reward + self.gamma * np.max(next_q_values)

        self.q_network.backward(state, q_values, target, self.learning_rate)

    def train(self, episodes=10):
        for _ in range(episodes):
            state = self.env.reset()
            state = np.expand_dims(state, axis=0)
            while True:
                action = self.choose_action(state)
                next_state, reward = self.env.step(action)
                next_state = np.expand_dims(next_state, axis=0)
                self.learn(state, action, reward, next_state)
                if np.array_equal(state, next_state):  # Add a condition to prevent infinite loops
                    break
                state = next_state

    def recommend(self, state):
        state = np.expand_dims(state, axis=0)
        q_values = self.q_network.predict(state)
        return np.argmax(q_values)


In [14]:
# Step 1: Mock Data
body_measurements = {'height': 165,}
gender = 'female'
style_preference = 'upper_body'
skin_tone = 1  # Example value for skin tone

# Step 2: Initialize the Environment
env = ClothingRecommendationEnv(body_measurements, gender, style_preference, skin_tone)

# Step 3: Train the Agent
agent = Agent(env)
agent.train(episodes=10)

# Step 4: Make a Recommendation
current_state = env.reset()
recommendation = agent.recommend(current_state)
print(f"Recommended action: {recommendation}")

Recommended action: 3
